From f088536bb62305e3980ec436a57ad6d5d7469117 Mon Sep 17 00:00:00 2001 From: "picocreator (Eugene Cheah)" Date: Sat, 20 Jan 2024 05:56:09 +0800 Subject: [PATCH 01/23] 7B baseline --- .../selective-loss/7B-baseline-run-100k.ipynb | 17253 ++++++++++++++++ 1 file changed, 17253 insertions(+) create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb new file mode 100644 index 00000000..917bd6d9 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb @@ -0,0 +1,17253 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 7B Baseline Run\n", + "\n", + "Without any experimental tweaks" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss\n", + "TRAINER_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"infctx-v5-selective-loss\"\n", + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-01-20 05:49:01-- https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\n", + "Resolving huggingface.co (huggingface.co)... 13.33.33.20, 13.33.33.110, 13.33.33.55, ...\n", + "Connecting to huggingface.co (huggingface.co)|13.33.33.20|:443... connected.\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "302 Found\n", + "Location: https://cdn-lfs.huggingface.co/repos/20/96/209600910186b29a71a230a87a5a555fd33bc07c57dabd6f6f9fd03523c5326f/1bc5c11ab576f130752f004698fd45e278094c671a4f6c067aeeeee9ebe74a48?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27RWKV-5-World-7B-v2-OnlyForTest_72%2525_trained-20231204-ctx4096.pth%3B+filename%3D%22RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth%22%3B&Expires=1705960141&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNTk2MDE0MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yMC85Ni8yMDk2MDA5MTAxODZiMjlhNzFhMjMwYTg3YTVhNTU1ZmQzM2JjMDdjNTdkYWJkNmY2ZjlmZDAzNTIzYzUzMjZmLzFiYzVjMTFhYjU3NmYxMzA3NTJmMDA0Njk4ZmQ0NWUyNzgwOTRjNjcxYTRmNmMwNjdhZWVlZWU5ZWJlNzRhNDg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=IxiX31-ebesq21xv4qkvpg6mAPXpnSh1MNiaba%7E%7EXEuvqBUd8jCh1SAwRKOnTUbJrQRye6AKOZg1AvrxYfHDP7dj4%7EUs0KCF-W0H4dFEe1MZv4mpE9xKG7FNaqStUtOh-XMSbpj7s30eWnhiUJTQMkCb%7EAutrFreWzNDNSNey6d%7EBxkurlmKxVQmNSKHfTSIN-dMvVQuf9SD6PGKbyPR8lwzvisEaJ4tSF%7EVgTf2lBbeFrbFWXOoWkRqF-j%7EmjWEzP-fCfTqmFBqA8f3P62jeXrhu-QqhAh5Id3bN25P29OVgJVl6jgyfkix-SaN%7EDkwDS5QxYhguKnyamkxIRaDtQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\n", + "--2024-01-20 05:49:01-- https://cdn-lfs.huggingface.co/repos/20/96/209600910186b29a71a230a87a5a555fd33bc07c57dabd6f6f9fd03523c5326f/1bc5c11ab576f130752f004698fd45e278094c671a4f6c067aeeeee9ebe74a48?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27RWKV-5-World-7B-v2-OnlyForTest_72%2525_trained-20231204-ctx4096.pth%3B+filename%3D%22RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth%22%3B&Expires=1705960141&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNTk2MDE0MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yMC85Ni8yMDk2MDA5MTAxODZiMjlhNzFhMjMwYTg3YTVhNTU1ZmQzM2JjMDdjNTdkYWJkNmY2ZjlmZDAzNTIzYzUzMjZmLzFiYzVjMTFhYjU3NmYxMzA3NTJmMDA0Njk4ZmQ0NWUyNzgwOTRjNjcxYTRmNmMwNjdhZWVlZWU5ZWJlNzRhNDg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=IxiX31-ebesq21xv4qkvpg6mAPXpnSh1MNiaba%7E%7EXEuvqBUd8jCh1SAwRKOnTUbJrQRye6AKOZg1AvrxYfHDP7dj4%7EUs0KCF-W0H4dFEe1MZv4mpE9xKG7FNaqStUtOh-XMSbpj7s30eWnhiUJTQMkCb%7EAutrFreWzNDNSNey6d%7EBxkurlmKxVQmNSKHfTSIN-dMvVQuf9SD6PGKbyPR8lwzvisEaJ4tSF%7EVgTf2lBbeFrbFWXOoWkRqF-j%7EmjWEzP-fCfTqmFBqA8f3P62jeXrhu-QqhAh5Id3bN25P29OVgJVl6jgyfkix-SaN%7EDkwDS5QxYhguKnyamkxIRaDtQ__&Key-Pair-Id=KVTP0A1DKRTAX\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 13.33.33.45, 13.33.33.93, 13.33.33.119, ...\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|13.33.33.45|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 15036197229 (14G) [binary/octet-stream]\n", + "Saving to: ‘RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth?download=true’\n", + "\n", + " RWKV-5-World-7B 42%[=======> ] 5.99G 11.8MB/s eta 9m 11s " + ] + } + ], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && cd \"./model\" && \\\n", + " wget -nc \"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Map (num_proc=160): 100%|███| 1000000/1000000 [00:10<00:00, 95906.26 examples/s]\n", + "Filter (num_proc=160): 100%|█| 1000000/1000000 [00:05<00:00, 179927.28 examples/\n", + "Map (num_proc=160): 100%|█████| 120800/120800 [00:03<00:00, 37916.96 examples/s]\n", + "Map (num_proc=160): 100%|█████| 120800/120800 [00:05<00:00, 21204.90 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 18147/18147 [00:04<00:00, 4124.48 examp\n", + "Saving the dataset (1/1 shards): 100%|█| 13423/13423 [00:00<00:00, 25885.58 exam\n" + ] + } + ], + "source": [ + "# Lets preload the requried dataset \n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-packing.yaml\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multi-epoch training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-19 02:10:43,455] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/enwiki_100k-world-16k-packing.yaml', '--model.load_model=../model/L8-D512-world-v5-init.pth', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - Baseline (trainsize=2k,packsize=16k) - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--trainer.devices=auto'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/enwiki_100k-world-16k-packing.yaml', '--model.load_model=../model/L8-D512-world-v5-init.pth', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - Baseline (trainsize=2k,packsize=16k) - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--trainer.devices=auto'].\n", + "Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 128\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 2\n", + " - effective_batch_size: 128\n", + "\n", + "[rank: 0] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-19 02:10:48,616] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 02:10:48,647] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 02:10:48,737] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 02:10:48,740] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 02:10:48,746] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 02:10:48,816] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 02:10:48,909] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[rank: 6] Seed set to 3941088705\n", + "[rank: 4] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 1] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "[rank: 7] Seed set to 3941088705\n", + "[rank: 5] Seed set to 3941088705\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "Detected CUDA files, patching ldflags\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "[rank: 3] Seed set to 3941088705\n", + "[rank: 2] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 6] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 1] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 3] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 7] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 2] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240119_021100-bzrubpww\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33minfctx-v5-selective-loss - Baseline (trainsize=2k,packsize=16k) - (deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/bzrubpww\u001b[0m\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:639: Checkpoint directory /home/recursal/RWKV-infctx-trainer/checkpoint/selective-loss/baseline exists and is not empty.\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 3.000e-04 (0.0003)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.0694894790649414 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10149145126342773 seconds\n", + "Time to load fused_adam op: 0.10158634185791016 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10157179832458496 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10195612907409668 seconds\n", + "Time to load fused_adam op: 0.1018822193145752 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10197162628173828 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10252904891967773 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 27.3 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "94.4 M Trainable params\n", + "0 Non-trainable params\n", + "94.4 M Total params\n", + "377.725 Total estimated model params size (MB)\n", + "Epoch 0: 100%|██| 284/284 [09:24<00:00, 0.50it/s, v_num=bpww, train/loss=5.940]\n", + "Validation: | | 0/? [00:00 Date: Sat, 20 Jan 2024 06:02:10 +0800 Subject: [PATCH 02/23] wip 7b baseline --- .../rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb index 917bd6d9..30846972 100644 --- a/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb @@ -82,7 +82,7 @@ "Length: 15036197229 (14G) [binary/octet-stream]\n", "Saving to: ‘RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth?download=true’\n", "\n", - " RWKV-5-World-7B 42%[=======> ] 5.99G 11.8MB/s eta 9m 11s " + "=true 78%[==============> ] 11.06G 15.7MB/s eta 3m 25s " ] } ], From c00a27eff686051c27c912ef9b0ec0c4ee0dd7b5 Mon Sep 17 00:00:00 2001 From: "picocreator (Eugene Cheah)" Date: Sat, 20 Jan 2024 06:06:25 +0800 Subject: [PATCH 03/23] sloss perf --- .../selective-loss/sloss-run-100k.ipynb | 38612 +++++++++++++++- 1 file changed, 38602 insertions(+), 10 deletions(-) diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/sloss-run-100k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/sloss-run-100k.ipynb index 3593220a..91c370be 100644 --- a/notebook/rwkv-x-exp/v5-exp/selective-loss/sloss-run-100k.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/sloss-run-100k.ipynb @@ -11,9 +11,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], "source": [ "GPU_DEVICES=\"auto\"\n", "ENABLE_WANDB=True\n", @@ -45,9 +57,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-19 05:28:33,486] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "---- Initializing model ----\n", + "No of layers: 8\n", + "Embedding size: 512\n", + "Output model path: ../model/L8-D512-world-v5-init.pth\n", + "Vocab size: 65536\n", + "Emb scale: 0.0001\n", + "Note: this process takes a significant time (and ram) for large models\n", + "---- ----- ----\n", + "Model exists, skipping init_model\n" + ] + } + ], "source": [ "# Lets initialized the model with the init_model.py code\n", "!cd \"{TRAINER_DIR}\" && python3 init_model.py \\\n", @@ -59,9 +89,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Map (num_proc=255): 100%|███| 1000000/1000000 [00:38<00:00, 25944.52 examples/s]\n", + "Filter (num_proc=255): 100%|█| 1000000/1000000 [00:16<00:00, 60006.93 examples/s\n", + "Map (num_proc=255): 100%|██████| 120800/120800 [00:16<00:00, 7317.51 examples/s]\n", + "Map (num_proc=255): 100%|██████| 120800/120800 [00:18<00:00, 6598.04 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 18201/18201 [00:01<00:00, 12987.11 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 13423/13423 [00:00<00:00, 40290.59 exam\n" + ] + } + ], "source": [ "# Lets preload the requried dataset \n", "!cd \"{TRAINER_DIR}\" && \\\n", @@ -79,7 +122,38556 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-19 05:33:08,562] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/sloss-enwiki_100k-world-16k-packing.yaml', '--model.load_model=../model/L8-D512-world-v5-init.pth', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - Selective Loss (trainsize=2k,packsize=16k) - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--trainer.devices=auto'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/sloss-enwiki_100k-world-16k-packing.yaml', '--model.load_model=../model/L8-D512-world-v5-init.pth', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - Selective Loss (trainsize=2k,packsize=16k) - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--trainer.devices=auto'].\n", + "Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 128\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 2\n", + " - effective_batch_size: 128\n", + "\n", + "[rank: 0] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-19 05:33:17,448] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:17,449] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:17,449] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:17,450] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:18,395] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:18,409] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:18,450] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 1] Seed set to 3941088705\n", + "[rank: 6] Seed set to 3941088705\n", + "[rank: 5] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 4] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 7] Seed set to 3941088705\n", + "[rank: 3] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 2] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 6] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 1] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 5] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 2] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 7] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 3] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240119_053338-0tw1sln7\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33minfctx-v5-selective-loss - Selective Loss (trainsize=2k,packsize=16k) - (deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/0tw1sln7\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 3.000e-04 (0.0003)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Creating extension directory /root/.cache/torch_extensions/py310_cu121/fused_adam...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "[1/3] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/includes -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam -isystem /usr/local/lib/python3.10/dist-packages/torch/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.10/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_89,code=compute_89 -gencode=arch=compute_89,code=sm_89 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_89,code=sm_89 -gencode=arch=compute_89,code=compute_89 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", + "[2/3] c++ -MMD -MF fused_adam_frontend.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/includes -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam -isystem /usr/local/lib/python3.10/dist-packages/torch/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.10/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DBF16_AVAILABLE -c /usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam/fused_adam_frontend.cpp -o fused_adam_frontend.o \n", + "[3/3] c++ fused_adam_frontend.o multi_tensor_adam.cuda.o -shared -L/usr/local/lib/python3.10/dist-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart -o fused_adam.so\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 21.555976390838623 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 21.641409158706665 seconds\n", + "Time to load fused_adam op: 21.641443490982056 seconds\n", + "Time to load fused_adam op: 21.64131236076355 seconds\n", + "Time to load fused_adam op: 21.64161515235901 seconds\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 21.640255212783813 seconds\n", + "Time to load fused_adam op: 21.6422917842865 seconds\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 21.64272451400757 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 27.3 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "94.4 M Trainable params\n", + "0 Non-trainable params\n", + "94.4 M Total params\n", + "377.725 Total estimated model params size (MB)\n", + "Epoch 0: 100%|██| 285/285 [06:40<00:00, 0.71it/s, v_num=sln7, train/loss=5.280]\n", + "Validation: | | 0/? [00:00 Date: Sat, 20 Jan 2024 14:28:30 +0800 Subject: [PATCH 04/23] 7B baseline --- .../selective-loss/7B-baseline-run-100k.ipynb | 17105 +--------------- 1 file changed, 12 insertions(+), 17093 deletions(-) diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb index 30846972..0f6ad0ef 100644 --- a/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb @@ -30,7 +30,7 @@ "GPU_DEVICES=\"auto\"\n", "ENABLE_WANDB=True\n", "WANDB_PREFIX=\"infctx-v5-selective-loss\"\n", - "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "DEEPSPEED_STRAT=\"deepspeed_stage_2\"\n", "\n", "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", @@ -41,7 +41,7 @@ " WANDB_MODE=\"disabled\"\n", "\n", "# The model sizing\n", - "MODEL_NAME=\"RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth\"\n", + "MODEL_NAME=\"RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth\"\n", "\n", "# Computing the notebook, and various paths\n", "import os\n", @@ -82,14 +82,17 @@ "Length: 15036197229 (14G) [binary/octet-stream]\n", "Saving to: ‘RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth?download=true’\n", "\n", - "=true 78%[==============> ] 11.06G 15.7MB/s eta 3m 25s " + "RWKV-5-World-7B-v2- 100%[===================>] 14.00G 13.5MB/s in 16m 12s \n", + "\n", + "2024-01-20 06:05:14 (14.8 MB/s) - ‘RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth?download=true’ saved [15036197229/15036197229]\n", + "\n" ] } ], "source": [ "# Lets wget the model files\n", "!cd \"{PROJECT_DIR}\" && cd \"./model\" && \\\n", - " wget -nc \"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"" + " wget -o \"{MODEL_NAME}\" -nc \"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"" ] }, { @@ -127,17085 +130,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2024-01-19 02:10:43,455] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/enwiki_100k-world-16k-packing.yaml', '--model.load_model=../model/L8-D512-world-v5-init.pth', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - Baseline (trainsize=2k,packsize=16k) - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--trainer.devices=auto'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/enwiki_100k-world-16k-packing.yaml', '--model.load_model=../model/L8-D512-world-v5-init.pth', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - Baseline (trainsize=2k,packsize=16k) - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--trainer.devices=auto'].\n", - "Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "\n", - "\n", - "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", - " - target_batch_size: 128\n", - " - num_nodes: 1\n", - " - num_devices: 8\n", - " - microbatch_size: 8\n", - " - accumulate_grad_batches: 2\n", - " - effective_batch_size: 128\n", - "\n", - "[rank: 0] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", - "[2024-01-19 02:10:48,616] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-19 02:10:48,647] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-19 02:10:48,737] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-19 02:10:48,740] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-19 02:10:48,746] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-19 02:10:48,816] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-19 02:10:48,909] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[rank: 6] Seed set to 3941088705\n", - "[rank: 4] Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "[rank: 1] Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "[rank: 7] Seed set to 3941088705\n", - "[rank: 5] Seed set to 3941088705\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "---\n", - "Detected CUDA files, patching ldflags\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "[rank: 3] Seed set to 3941088705\n", - "[rank: 2] Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "Loading extension module wkv5...\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "[rank: 6] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", - "[rank: 4] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", - "[rank: 1] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", - "[rank: 3] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", - "[rank: 7] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", - "[rank: 2] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", - "[rank: 5] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", - "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240119_021100-bzrubpww\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33minfctx-v5-selective-loss - Baseline (trainsize=2k,packsize=16k) - (deepspeed_stage_1)\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/bzrubpww\u001b[0m\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:639: Checkpoint directory /home/recursal/RWKV-infctx-trainer/checkpoint/selective-loss/baseline exists and is not empty.\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "#\n", - "# RWKV lighting_trainer.py important notes \n", - "# https://github.com/RWKV/RWKV-infctx-trainer \n", - "#\n", - "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", - "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", - "# - When resuming from checkpoint, the estimated time is inaccurate\n", - "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "\n", - "[RWKV.model] Configuring optimizer with\n", - " - lr_init: 8.000e-04 (0.0008)\n", - " - lr_final: 3.000e-04 (0.0003)\n", - "\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", - "Building extension module fused_adam...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.0694894790649414 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10149145126342773 seconds\n", - "Time to load fused_adam op: 0.10158634185791016 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10157179832458496 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10195612907409668 seconds\n", - "Time to load fused_adam op: 0.1018822193145752 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Loading extension module fused_adam...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10197162628173828 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10252904891967773 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "\n", - " | Name | Type | Params\n", - "--------------------------------------\n", - "0 | emb | Embedding | 33.6 M\n", - "1 | blocks | ModuleList | 27.3 M\n", - "2 | ln_out | LayerNorm | 1.0 K \n", - "3 | head | Linear | 33.6 M\n", - "--------------------------------------\n", - "94.4 M Trainable params\n", - "0 Non-trainable params\n", - "94.4 M Total params\n", - "377.725 Total estimated model params size (MB)\n", - "Epoch 0: 100%|██| 284/284 [09:24<00:00, 0.50it/s, v_num=bpww, train/loss=5.940]\n", - "Validation: | | 0/? [00:00 Date: Sat, 20 Jan 2024 23:12:58 +0000 Subject: [PATCH 05/23] Fixed DS 3? --- RWKV-v5/src/model.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/RWKV-v5/src/model.py b/RWKV-v5/src/model.py index 779b348a..db95a2ce 100644 --- a/RWKV-v5/src/model.py +++ b/RWKV-v5/src/model.py @@ -1139,13 +1139,18 @@ def checkpointed_step(idx, targets, mask, last_shift_states, # https://lightning.ai/docs/pytorch/2.0.4/common/lightning_module.html#backward learning_loss = segment_train_loss / gradient_accumulation_steps - # Undocumented multiple backward pass support - # https://github.com/Lightning-AI/lightning/blob/678f642808c54e4c490caee4df5d357301c976bb/tests/trainer/optimization/test_manual_optimization.py#L251 - self.manual_backward(learning_loss, optimizer, retain_graph=True) - - # Accumulate without gradient, as we already did the backward pass - # This does mean, that a single backward pass is "wasted" at the end - training_loss = training_loss + segment_train_loss.clone().detach().requires_grad_(False) + # Perform the backward pass accordingly, for valid segments (besides the last segment) + if i == start_learning_segment + backward_segment_count - 1: + # This is the last backward pass, we let the default pytorch lightning handle the backward pass + # and return the segment loss as part of the total loss + training_loss = training_loss + segment_train_loss + else: + # Undocumented multiple backward pass support + # https://github.com/Lightning-AI/lightning/blob/678f642808c54e4c490caee4df5d357301c976bb/tests/trainer/optimization/test_manual_optimization.py#L251 + self.manual_backward(learning_loss, optimizer, retain_graph=True) + + # Accumulate without gradient, as we already did the backward pass + training_loss = training_loss + segment_train_loss.clone().detach().requires_grad_(False) else: # Even if its not the segments we use for backward pass, we still need to accumulate the loss training_loss = training_loss + segment_train_loss.clone().detach().requires_grad_(False) From e0aad5373ccc7b9045b11e5731b54fc3b0f77de1 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Sat, 20 Jan 2024 23:19:51 +0000 Subject: [PATCH 06/23] deepspeed and multi gpu validation --- .../multi-gpu-enwiki-train.ipynb | 1714 +++++++++++++++++ 1 file changed, 1714 insertions(+) create mode 100644 notebook/trainer-v5-unit-test/multi-gpu-enwiki-train.ipynb diff --git a/notebook/trainer-v5-unit-test/multi-gpu-enwiki-train.ipynb b/notebook/trainer-v5-unit-test/multi-gpu-enwiki-train.ipynb new file mode 100644 index 00000000..dbef2140 --- /dev/null +++ b/notebook/trainer-v5-unit-test/multi-gpu-enwiki-train.ipynb @@ -0,0 +1,1714 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multi-GPU Enwiki Train\n", + "\n", + "Test that the model init code, runs without issues.\n", + "This runs through the various deepspeed options, and is meant to run on atleast 2 GPUs\n", + "\n", + "**L6-D512 model with**\n", + "- Layer count: 6\n", + "- Embed size: 512" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "notebookRunGroups": { + "groupValue": "" + } + }, + "source": [ + "## Preparing the init model and test dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENABLE_WANDB: False\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /home/recursal/RWKV-infctx-trainer/notebook/trainer-v5-unit-test\n", + "TRAINER_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /home/recursal/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=False\n", + "WANDB_PREFIX=\"infctx-v5-unit-test\"\n", + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# First lets setup the various directories\n", + "!mkdir -p \"{PROJECT_DIR}/model/\"\n", + "!mkdir -p \"{PROJECT_DIR}/datapath/\"\n", + "!mkdir -p \"{PROJECT_DIR}/checkpoint/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-20 23:13:08,834] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "---- Initializing model ----\n", + "No of layers: 6\n", + "Embedding size: 512\n", + "Output model path: ../model/L6-D512-world-init.pth\n", + "Vocab size: 65536\n", + "Emb scale: 0.0001\n", + "Note: this process takes a significant time (and ram) for large models\n", + "---- ----- ----\n", + "Model exists, skipping init_model\n" + ] + } + ], + "source": [ + "# Lets initialized the L6-D512 model with the init_model.py code\n", + "!cd \"{TRAINER_DIR}\" && python3 init_model.py \\\n", + " --n_layer 6 --n_embd 512 \\\n", + " --vocab_size world \\\n", + " --skip-if-exists --safe-init \\\n", + " ../model/L6-D512-world-init.pth" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Map (num_proc=160): 100%|███████| 10000/10000 [00:00<00:00, 10822.35 examples/s]\n", + "Filter (num_proc=160): 100%|████| 10000/10000 [00:00<00:00, 15584.16 examples/s]\n", + "Map (num_proc=160): 100%|███████████| 1339/1339 [00:02<00:00, 496.73 examples/s]\n", + "Map (num_proc=160): 100%|█████████████| 690/690 [00:00<00:00, 996.37 examples/s]\n", + "Saving the dataset (1/1 shards): 100%|█| 690/690 [00:00<00:00, 10690.47 examples\n", + "Saving the dataset (1/1 shards): 100%|███| 7/7 [00:00<00:00, 2719.79 examples/s]\n" + ] + } + ], + "source": [ + "# Preload the dataset\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/config/enwiki_10k-world-4x1024.yaml\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Empty out the checkpoint\n", + "!cd \"{PROJECT_DIR}\" && rm -rf \"./checkpoint/infctx-v5-unit-test-baseline-4x1024/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deepspeed based test\n", + "\n", + "Lets re-run everything with additional deepspeed variations\n", + "\n", + "**Deepspeed 1**" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-20 23:13:32,907] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/trainer-v5-unit-test/config/enwiki_10k-world-4x1024.yaml', '--trainer.logger.init_args.name=infctx-v5-unit-test (train-ctx=1024, data-ctx=4096, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.load_model=../model/L6-D512-world-init.pth'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/trainer-v5-unit-test/config/enwiki_10k-world-4x1024.yaml', '--trainer.logger.init_args.name=infctx-v5-unit-test (train-ctx=1024, data-ctx=4096, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.load_model=../model/L6-D512-world-init.pth'].\n", + "Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 16\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 1\n", + " - effective_batch_size: 64\n", + "\n", + "[2024-01-20 23:13:38,019] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-20 23:13:38,034] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-20 23:13:38,041] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-20 23:13:38,053] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-20 23:13:38,162] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-20 23:13:38,257] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-20 23:13:38,259] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[rank: 7] Seed set to 3941088705\n", + "[rank: 2] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 3] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 5] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "[rank: 6] Seed set to 3941088705\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 1] Seed set to 3941088705\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 4] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Saving the dataset (1/1 shards): 100%|█| 690/690 [00:00<00:00, 1784.86 examples/\n", + "Saving the dataset (1/1 shards): 100%|███| 7/7 [00:00<00:00, 2061.95 examples/s]\n", + "[rank: 0] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[rank: 7] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 3] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 2] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 6] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 1] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.06023287773132324 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10158467292785645 seconds\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.1016077995300293 seconds/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10178995132446289 secondsTime to load fused_adam op: 0.10172677040100098 seconds\n", + "\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10198116302490234 seconds\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10247421264648438 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10244083404541016 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 20.5 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 91%|███▋| 10/11 [00:11<00:01, 0.85it/s, v_num=bekp, train/loss=7.560]/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|████| 11/11 [00:13<00:00, 0.81it/s, v_num=bekp, train/loss=7.660]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.06238269805908203 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10197782516479492 seconds\n", + "Time to load fused_adam op: 0.10209941864013672 secondsLoading extension module fused_adam...\n", + "\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10231184959411621 seconds\n", + "Time to load fused_adam op: 0.10195589065551758 seconds\n", + "Time to load fused_adam op: 0.10258698463439941 seconds\n", + "Time to load fused_adam op: 0.10204219818115234 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.1022348403930664 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 20.5 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 91%|███▋| 10/11 [00:11<00:01, 0.85it/s, v_num=lsx7, train/loss=7.560]/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|████| 11/11 [00:13<00:00, 0.81it/s, v_num=lsx7, train/loss=7.620]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Creating extension directory /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam/build.ninja...\n", + "Building extension module cpu_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "[1/4] /home/recursal/miniconda3/envs/rwkv-infctx/bin/nvcc -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/includes -I/home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/TH -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/THC -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_89,code=compute_89 -gencode=arch=compute_89,code=sm_89 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++17 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_89,code=sm_89 -gencode=arch=compute_89,code=compute_89 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -c /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/common/custom_cuda_kernel.cu -o custom_cuda_kernel.cuda.o \n", + "[2/4] c++ -MMD -MF cpu_adam_impl.o.d -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/includes -I/home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/TH -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/THC -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -L/home/recursal/miniconda3/envs/rwkv-infctx/lib -lcudart -lcublas -g -march=native -fopenmp -D__AVX256__ -D__ENABLE_CUDA__ -DBF16_AVAILABLE -c /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/adam/cpu_adam_impl.cpp -o cpu_adam_impl.o \n", + "[3/4] c++ -MMD -MF cpu_adam.o.d -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/includes -I/home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/TH -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/THC -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -L/home/recursal/miniconda3/envs/rwkv-infctx/lib -lcudart -lcublas -g -march=native -fopenmp -D__AVX256__ -D__ENABLE_CUDA__ -DBF16_AVAILABLE -c /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/adam/cpu_adam.cpp -o cpu_adam.o \n", + "[4/4] c++ cpu_adam.o cpu_adam_impl.o custom_cuda_kernel.cuda.o -shared -lcurand -L/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/home/recursal/miniconda3/envs/rwkv-infctx/lib -lcudart -o cpu_adam.so\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.55697989463806 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.60743737220764 seconds\n", + "Loading extension module cpu_adam...\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.604738235473633 seconds\n", + "Time to load cpu_adam op: 29.611495971679688 seconds\n", + "Loading extension module cpu_adam...\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.617934703826904 seconds\n", + "Time to load cpu_adam op: 29.6148624420166 seconds\n", + "Loading extension module cpu_adam...\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.61309814453125 seconds\n", + "Time to load cpu_adam op: 29.62095355987549 seconds\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 20.5 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 0%| | 0/11 [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.05372977256774902 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", + "Time to load fused_adam op: 0.10151505470275879 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam.../home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + "Time to load fused_adam op: 0.10130453109741211 seconds\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10213708877563477 seconds\n", + "Time to load fused_adam op: 0.10188078880310059 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10168170928955078 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10216069221496582 seconds\n", + "Time to load fused_adam op: 0.10223817825317383 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Parameter Offload: Total persistent parameters: 45056 in 88 params\n", + "\n", + " | Name | Type | Params | Params per Device\n", + "----------------------------------------------------------\n", + "0 | emb | Embedding | 33.6 M | 4.2 M \n", + "1 | blocks | ModuleList | 20.5 M | 2.6 M \n", + "2 | ln_out | LayerNorm | 1.0 K | 128 \n", + "3 | head | Linear | 33.6 M | 4.2 M \n", + "----------------------------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 0%| | 0/11 [00:00) and the tensors embedded in it cannot be detected. The ZeRO-3 hooks designed to trigger before or after backward pass of the module relies on knowing the input and output tensors and therefore may not get triggered properly.\n", + "Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 9%|▍ | 1/11 [00:02<00:22, 0.45it/s, v_num=9uto, train/loss=11.20]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 18%|▉ | 2/11 [00:03<00:16, 0.53it/s, v_num=9uto, train/loss=10.30]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 27%|█▎ | 3/11 [00:05<00:14, 0.57it/s, v_num=9uto, train/loss=9.810]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 36%|█▊ | 4/11 [00:06<00:11, 0.59it/s, v_num=9uto, train/loss=9.380]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 45%|██▎ | 5/11 [00:08<00:09, 0.60it/s, v_num=9uto, train/loss=8.940]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 55%|██▋ | 6/11 [00:09<00:08, 0.61it/s, v_num=9uto, train/loss=8.810]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 64%|███▏ | 7/11 [00:11<00:06, 0.62it/s, v_num=9uto, train/loss=8.440]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 73%|███▋ | 8/11 [00:12<00:04, 0.62it/s, v_num=9uto, train/loss=8.120]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 82%|████ | 9/11 [00:14<00:03, 0.63it/s, v_num=9uto, train/loss=8.000]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 91%|███▋| 10/11 [00:15<00:01, 0.63it/s, v_num=9uto, train/loss=7.720]/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/strategies/deepspeed.py:758: When saving the DeepSpeed Stage 3 checkpoint, each worker will save a shard of the checkpoint within a directory. If a single file is required after training, see https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-3-single-file for instructions.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 100%|████| 11/11 [00:17<00:00, 0.62it/s, v_num=9uto, train/loss=7.750]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam/build.ninja...\n", + "Building extension module cpu_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.4077200889587402 seconds\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam/build.ninja...\n", + "Building extension module cpu_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "ninja: no work to do.\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.4514763355255127 seconds\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam/build.ninja...\n", + "Building extension module cpu_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.514955759048462 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5427651405334473 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5546679496765137 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5623202323913574 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5747101306915283 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5695078372955322 seconds\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Parameter Offload: Total persistent parameters: 45056 in 88 params\n", + "\n", + " | Name | Type | Params | Params per Device\n", + "----------------------------------------------------------\n", + "0 | emb | Embedding | 33.6 M | 4.2 M \n", + "1 | blocks | ModuleList | 20.5 M | 2.6 M \n", + "2 | ln_out | LayerNorm | 1.0 K | 128 \n", + "3 | head | Linear | 33.6 M | 4.2 M \n", + "----------------------------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 0%| | 0/11 [00:00) and the tensors embedded in it cannot be detected. The ZeRO-3 hooks designed to trigger before or after backward pass of the module relies on knowing the input and output tensors and therefore may not get triggered properly.\n", + "Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 9%|▍ | 1/11 [00:02<00:26, 0.37it/s, v_num=jhog, train/loss=11.20]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 18%|▉ | 2/11 [00:04<00:20, 0.44it/s, v_num=jhog, train/loss=10.30]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 27%|█▎ | 3/11 [00:06<00:17, 0.47it/s, v_num=jhog, train/loss=9.810]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 36%|█▊ | 4/11 [00:08<00:14, 0.48it/s, v_num=jhog, train/loss=9.380]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 45%|██▎ | 5/11 [00:10<00:12, 0.49it/s, v_num=jhog, train/loss=8.940]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 55%|██▋ | 6/11 [00:11<00:09, 0.50it/s, v_num=jhog, train/loss=8.810]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 64%|███▏ | 7/11 [00:13<00:07, 0.50it/s, v_num=jhog, train/loss=8.500]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 73%|███▋ | 8/11 [00:15<00:05, 0.51it/s, v_num=jhog, train/loss=8.120]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 82%|████ | 9/11 [00:17<00:03, 0.51it/s, v_num=jhog, train/loss=8.000]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 91%|███▋| 10/11 [00:19<00:01, 0.52it/s, v_num=jhog, train/loss=7.720]/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/strategies/deepspeed.py:758: When saving the DeepSpeed Stage 3 checkpoint, each worker will save a shard of the checkpoint within a directory. If a single file is required after training, see https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-3-single-file for instructions.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 100%|████| 11/11 [00:21<00:00, 0.51it/s, v_num=jhog, train/loss=7.750]\n", + "Validation: | | 0/? [00:00 Date: Sun, 21 Jan 2024 01:54:17 +0000 Subject: [PATCH 07/23] 7B benchmarking --- .../selective-loss/7B-baseline-run-100k.ipynb | 5 +- .../7B-sxm-benchmark.ipynb | 190 +++++++++++ .../config/enwiki_100k-world-16k-packing.yaml | 298 ++++++++++++++++++ 3 files changed, 491 insertions(+), 2 deletions(-) create mode 100644 notebook/trainer-v5-validation/7B-sxm-benchmark.ipynb create mode 100644 notebook/trainer-v5-validation/config/enwiki_100k-world-16k-packing.yaml diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb index 0f6ad0ef..83500da9 100644 --- a/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/7B-baseline-run-100k.ipynb @@ -91,8 +91,9 @@ ], "source": [ "# Lets wget the model files\n", - "!cd \"{PROJECT_DIR}\" && cd \"./model\" && \\\n", - " wget -o \"{MODEL_NAME}\" -nc \"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"" + "!mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"" ] }, { diff --git a/notebook/trainer-v5-validation/7B-sxm-benchmark.ipynb b/notebook/trainer-v5-validation/7B-sxm-benchmark.ipynb new file mode 100644 index 00000000..de8e75f0 --- /dev/null +++ b/notebook/trainer-v5-validation/7B-sxm-benchmark.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 7B SXM based bencmark\n", + "\n", + "The following is for benchmarking 7B training on 8 x 80GB vram based nvidia cards.\n", + "With the following settings.\n", + "- 16k data pack size\n", + "- 4k training size\n", + "- microbatch 10\n", + "\n", + "The following are expected per GPU numbers\n", + "\n", + "| GPU Model | Deepspeed 2 | Deepspeed 3 |\n", + "|-----------|-------------|-------------|\n", + "| H100 SXM | 7 kT/s | - |\n", + "| H100 PCIe | 4.2 kT/s | - |\n", + "| A100 SXM | 3 kT/s | 2.6 kT/s |\n", + "| A100 PCIe | - | - |\n", + "| H800 SXM* | 7 kT/s | - |\n", + "\n", + "H800 is the \"china safe export\" edition of H100, with its numbers coming from the RWKV-LM repo (not infctx repo). Left here for reference." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /home/recursal/RWKV-infctx-trainer/notebook/trainer-v5-validation\n", + "TRAINER_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /home/recursal/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"infctx-v5-selective-loss\"\n", + "DEEPSPEED_STRAT=\"deepspeed_stage_2\"\n", + "\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-01-20 05:49:01-- https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\n", + "Resolving huggingface.co (huggingface.co)... 13.33.33.20, 13.33.33.110, 13.33.33.55, ...\n", + "Connecting to huggingface.co (huggingface.co)|13.33.33.20|:443... connected.\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "302 Found\n", + "Location: https://cdn-lfs.huggingface.co/repos/20/96/209600910186b29a71a230a87a5a555fd33bc07c57dabd6f6f9fd03523c5326f/1bc5c11ab576f130752f004698fd45e278094c671a4f6c067aeeeee9ebe74a48?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27RWKV-5-World-7B-v2-OnlyForTest_72%2525_trained-20231204-ctx4096.pth%3B+filename%3D%22RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth%22%3B&Expires=1705960141&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNTk2MDE0MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yMC85Ni8yMDk2MDA5MTAxODZiMjlhNzFhMjMwYTg3YTVhNTU1ZmQzM2JjMDdjNTdkYWJkNmY2ZjlmZDAzNTIzYzUzMjZmLzFiYzVjMTFhYjU3NmYxMzA3NTJmMDA0Njk4ZmQ0NWUyNzgwOTRjNjcxYTRmNmMwNjdhZWVlZWU5ZWJlNzRhNDg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=IxiX31-ebesq21xv4qkvpg6mAPXpnSh1MNiaba%7E%7EXEuvqBUd8jCh1SAwRKOnTUbJrQRye6AKOZg1AvrxYfHDP7dj4%7EUs0KCF-W0H4dFEe1MZv4mpE9xKG7FNaqStUtOh-XMSbpj7s30eWnhiUJTQMkCb%7EAutrFreWzNDNSNey6d%7EBxkurlmKxVQmNSKHfTSIN-dMvVQuf9SD6PGKbyPR8lwzvisEaJ4tSF%7EVgTf2lBbeFrbFWXOoWkRqF-j%7EmjWEzP-fCfTqmFBqA8f3P62jeXrhu-QqhAh5Id3bN25P29OVgJVl6jgyfkix-SaN%7EDkwDS5QxYhguKnyamkxIRaDtQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\n", + "--2024-01-20 05:49:01-- https://cdn-lfs.huggingface.co/repos/20/96/209600910186b29a71a230a87a5a555fd33bc07c57dabd6f6f9fd03523c5326f/1bc5c11ab576f130752f004698fd45e278094c671a4f6c067aeeeee9ebe74a48?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27RWKV-5-World-7B-v2-OnlyForTest_72%2525_trained-20231204-ctx4096.pth%3B+filename%3D%22RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth%22%3B&Expires=1705960141&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNTk2MDE0MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yMC85Ni8yMDk2MDA5MTAxODZiMjlhNzFhMjMwYTg3YTVhNTU1ZmQzM2JjMDdjNTdkYWJkNmY2ZjlmZDAzNTIzYzUzMjZmLzFiYzVjMTFhYjU3NmYxMzA3NTJmMDA0Njk4ZmQ0NWUyNzgwOTRjNjcxYTRmNmMwNjdhZWVlZWU5ZWJlNzRhNDg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=IxiX31-ebesq21xv4qkvpg6mAPXpnSh1MNiaba%7E%7EXEuvqBUd8jCh1SAwRKOnTUbJrQRye6AKOZg1AvrxYfHDP7dj4%7EUs0KCF-W0H4dFEe1MZv4mpE9xKG7FNaqStUtOh-XMSbpj7s30eWnhiUJTQMkCb%7EAutrFreWzNDNSNey6d%7EBxkurlmKxVQmNSKHfTSIN-dMvVQuf9SD6PGKbyPR8lwzvisEaJ4tSF%7EVgTf2lBbeFrbFWXOoWkRqF-j%7EmjWEzP-fCfTqmFBqA8f3P62jeXrhu-QqhAh5Id3bN25P29OVgJVl6jgyfkix-SaN%7EDkwDS5QxYhguKnyamkxIRaDtQ__&Key-Pair-Id=KVTP0A1DKRTAX\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 13.33.33.45, 13.33.33.93, 13.33.33.119, ...\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|13.33.33.45|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 15036197229 (14G) [binary/octet-stream]\n", + "Saving to: ‘RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth?download=true’\n", + "\n", + "RWKV-5-World-7B-v2- 100%[===================>] 14.00G 13.5MB/s in 16m 12s \n", + "\n", + "2024-01-20 06:05:14 (14.8 MB/s) - ‘RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth?download=true’ saved [15036197229/15036197229]\n", + "\n" + ] + } + ], + "source": [ + "# Lets wget the model files\n", + "!mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Map (num_proc=160): 100%|███| 1000000/1000000 [00:10<00:00, 95906.26 examples/s]\n", + "Filter (num_proc=160): 100%|█| 1000000/1000000 [00:05<00:00, 179927.28 examples/\n", + "Map (num_proc=160): 100%|█████| 120800/120800 [00:03<00:00, 37916.96 examples/s]\n", + "Map (num_proc=160): 100%|█████| 120800/120800 [00:05<00:00, 21204.90 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 18147/18147 [00:04<00:00, 4124.48 examp\n", + "Saving the dataset (1/1 shards): 100%|█| 13423/13423 [00:00<00:00, 25885.58 exam\n" + ] + } + ], + "source": [ + "# Lets preload the requried dataset \n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-packing.yaml\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Actual training run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-packing.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --data.skip_datapath_setup=True \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/v5-7b-benchmark/baseline/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - 7B - Baseline (packsize=16k, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.target_batch_size=640 \\\n", + " --trainer.microbatch_size=10 \\\n", + " --model.ctx_len=4096 \\\n", + " --trainer.devices=\"{GPU_DEVICES}\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rwkv-infctx", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebook/trainer-v5-validation/config/enwiki_100k-world-16k-packing.yaml b/notebook/trainer-v5-validation/config/enwiki_100k-world-16k-packing.yaml new file mode 100644 index 00000000..31ea7eed --- /dev/null +++ b/notebook/trainer-v5-validation/config/enwiki_100k-world-16k-packing.yaml @@ -0,0 +1,298 @@ +# lightning.pytorch==2.0.2 +seed_everything: 3941088705 +trainer: + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_2 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'infctx-v5-unit-test-baseline (train-ctx=4096, data-ctx=16k-packing)' + project: 'RWKV-X-SLoss' + tags: ['RWKV', 'infctx'] + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/baseline + filename: null + + # Save the top/last K checkpoints + save_top_k: 3 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 8e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 3e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 2 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_100k-world-16k-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_100k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.10 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 1024 + max_token_size: 16384 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 4096 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 16384 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null From c251e14c53d6dcd0b032239cfc4dc985ecd3c027 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Sun, 21 Jan 2024 01:56:12 +0000 Subject: [PATCH 08/23] benchmark file tweak --- .../{7B-sxm-benchmark.ipynb => 7B-8x80GB-benchmark.ipynb} | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) rename notebook/trainer-v5-validation/{7B-sxm-benchmark.ipynb => 7B-8x80GB-benchmark.ipynb} (98%) diff --git a/notebook/trainer-v5-validation/7B-sxm-benchmark.ipynb b/notebook/trainer-v5-validation/7B-8x80GB-benchmark.ipynb similarity index 98% rename from notebook/trainer-v5-validation/7B-sxm-benchmark.ipynb rename to notebook/trainer-v5-validation/7B-8x80GB-benchmark.ipynb index de8e75f0..f5ae0df9 100644 --- a/notebook/trainer-v5-validation/7B-sxm-benchmark.ipynb +++ b/notebook/trainer-v5-validation/7B-8x80GB-benchmark.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 7B SXM based bencmark\n", + "# 7B based - 8x80GB Vram benchmark\n", "\n", "The following is for benchmarking 7B training on 8 x 80GB vram based nvidia cards.\n", "With the following settings.\n", @@ -22,7 +22,9 @@ "| A100 PCIe | - | - |\n", "| H800 SXM* | 7 kT/s | - |\n", "\n", - "H800 is the \"china safe export\" edition of H100, with its numbers coming from the RWKV-LM repo (not infctx repo). Left here for reference." + "H800 is the \"china safe export\" edition of H100, with its numbers coming from the RWKV-LM repo (not infctx repo). Left here for reference.\n", + "\n", + "Blanks means we did'nt run them (yet?)." ] }, { From 01835f4fa6fb4dcf836d7619f4a3acc1f0e47382 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Sun, 21 Jan 2024 03:14:15 +0000 Subject: [PATCH 09/23] tweak --- notebook/trainer-v5-validation/7B-8x80GB-benchmark.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebook/trainer-v5-validation/7B-8x80GB-benchmark.ipynb b/notebook/trainer-v5-validation/7B-8x80GB-benchmark.ipynb index f5ae0df9..3dfff17d 100644 --- a/notebook/trainer-v5-validation/7B-8x80GB-benchmark.ipynb +++ b/notebook/trainer-v5-validation/7B-8x80GB-benchmark.ipynb @@ -22,7 +22,7 @@ "| A100 PCIe | - | - |\n", "| H800 SXM* | 7 kT/s | - |\n", "\n", - "H800 is the \"china safe export\" edition of H100, with its numbers coming from the RWKV-LM repo (not infctx repo). Left here for reference.\n", + "H800 is the \"china safe export\" edition of H100, with its numbers coming from the RWKV-LM repo, with different settings (not infctx repo). Left here for reference.\n", "\n", "Blanks means we did'nt run them (yet?)." ] From b237fb36150bb3fb04cc2c59dba2a04631324b58 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Sun, 21 Jan 2024 04:58:49 +0000 Subject: [PATCH 10/23] WIP 1B5 baseline --- .../selective-loss/1B5-baseline-run-10k.ipynb | 172 ++++++++++ .../config/1B5-enwiki_10k-world-packing.yaml | 298 ++++++++++++++++++ 2 files changed, 470 insertions(+) create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb new file mode 100644 index 00000000..4982ca16 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1B5 Baseline Run\n", + "\n", + "Without any experimental tweaks" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss\n", + "TRAINER_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /home/recursal/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"infctx-v5-selective-loss\"\n", + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-1B5-world.bin\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/RWKV-5-World-1B5-v2-20231025-ctx4096.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File ‘RWKV-v5-1B5-world.bin’ already there; not retrieving.\n" + ] + } + ], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving the dataset (1/1 shards): 100%|█| 646/646 [00:00<00:00, 7625.90 examples/\n", + "Saving the dataset (1/1 shards): 100%|█| 803/803 [00:00<00:00, 33706.89 examples\n" + ] + } + ], + "source": [ + "# Lets preload the requried dataset \n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/config/1B5-enwiki_10k-world-packing.yaml\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multi-epoch training" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-21 04:55:04,998] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=1024', '--trainer.devices=auto'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=1024', '--trainer.devices=auto'].\n", + "Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n" + ] + } + ], + "source": [ + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/config/1B5-enwiki_10k-world-packing.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --data.skip_datapath_setup=True \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/selective-loss/baseline/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - 1B5 - Baseline (packsize=16k, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.target_batch_size=128 \\\n", + " --trainer.microbatch_size=8 \\\n", + " --model.ctx_len=1024 \\\n", + " --trainer.devices=\"{GPU_DEVICES}\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rwkv-infctx", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml new file mode 100644 index 00000000..828da740 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml @@ -0,0 +1,298 @@ +# lightning.pytorch==2.0.2 +seed_everything: 3941088705 +trainer: + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_2 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'infctx-v5-unit-test-baseline (train-ctx=1024, data-ctx=16k-packing)' + project: 'RWKV-X-SLoss' + tags: ['RWKV', 'infctx'] + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/baseline + filename: null + + # Save the top/last K checkpoints + save_top_k: 3 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 1e-5 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 1e-5 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_10k-world-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_10k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.05 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 64 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null From 9438607fe10a47556038810f6634aeb4e05d4436 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Sun, 21 Jan 2024 05:48:37 +0000 Subject: [PATCH 11/23] WIP 1B5 baseline and sloss runs --- .../selective-loss/1B5-baseline-run-10k.ipynb | 341 +++++++++++- .../selective-loss/1B5-sloss-run-10k.ipynb | 507 ++++++++++++++++++ .../config/1B5-enwiki_10k-world-packing.yaml | 6 +- .../1B5-sloss-enwiki_10k-world-packing.yaml | 319 +++++++++++ 4 files changed, 1167 insertions(+), 6 deletions(-) create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-sloss-run-10k.ipynb create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_10k-world-packing.yaml diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb index 4982ca16..61bd7601 100644 --- a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb @@ -104,14 +104,14 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2024-01-21 04:55:04,998] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 05:28:21,163] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=1024', '--trainer.devices=auto'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=1024', '--trainer.devices=auto'].\n", "Seed set to 3941088705\n", @@ -127,7 +127,342 @@ "ninja: no work to do.\n", "Loading extension module wkv5...\n", "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n" + "---\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 128\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 2\n", + " - effective_batch_size: 128\n", + "\n", + "[rank: 0] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-21 05:28:38,693] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 05:28:38,760] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 05:28:38,764] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 05:28:38,765] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 05:28:38,766] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 05:28:38,772] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 05:28:38,915] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[rank: 4] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 6] Seed set to 3941088705\n", + "[rank: 1] Seed set to 3941088705\n", + "[rank: 7] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 2] Seed set to 3941088705\n", + "[rank: 5] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 3] Seed set to 3941088705\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 5] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 6] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 1] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 3] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 7] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 2] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 4] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240121_052914-a4wk0ytz\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33minfctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/a4wk0ytz\u001b[0m\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:639: Checkpoint directory /home/recursal/RWKV-infctx-trainer/checkpoint/selective-loss/baseline exists and is not empty.\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 1.000e-04 (0.0001)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.05791044235229492 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10227656364440918 seconds\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.1022181510925293 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10222673416137695 seconds\n", + "Time to load fused_adam op: 0.1024785041809082 seconds\n", + "Time to load fused_adam op: 0.10240483283996582 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10320234298706055 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10330080986022949 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 11/11 [03:10<00:00, 0.06it/s, v_num=0ytz, train/loss=2.120]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 1.000e-04 (0.0001)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.05791044235229492 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10227656364440918 seconds\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.1022181510925293 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10222673416137695 seconds\n", + "Time to load fused_adam op: 0.1024785041809082 seconds\n", + "Time to load fused_adam op: 0.10240483283996582 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10320234298706055 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10330080986022949 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 11/11 [03:10<00:00, 0.06it/s, v_num=0ytz, train/loss=2.120]\n", + "Validation: | | 0/? [00:00` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/baseline + filename: null + + # Save the top/last K checkpoints + save_top_k: 3 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 1e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 1e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Skip tokens that are already learnt, and are below the target threshold + token_loss_threshold: 1.0 + # Perform token based drop out at random, to the target % rate + token_dropout_rate: 0.1 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_10k-world-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_10k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.1 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 64 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Prefix token masking + # + # The rationale behind this, is that the first X tokens should not be "backpropped" + # for any new training record. As its unfair to expect the model (or a human) make + # any resonable guesses at that stage. As such this is used to "mask" the first X tokens + # from the loss calculation, and thus not backpropped. + data_prefix_skip_mask: 8 + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null From 86b3aea99a50446e12b3073f7419dd48316a426f Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Sun, 21 Jan 2024 07:35:33 +0000 Subject: [PATCH 12/23] WIP experiments --- .../selective-loss/1B5-baseline-run-10k.ipynb | 225 +++++++++++++++++- .../selective-loss/1B5-sloss-run-10k.ipynb | 6 +- .../1B5-sloss-enwiki_10k-world-packing.yaml | 4 +- .../7B-8x80GB-benchmark.ipynb | 2 +- 4 files changed, 229 insertions(+), 8 deletions(-) diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb index 61bd7601..6583b6a7 100644 --- a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb @@ -462,7 +462,228 @@ " warnings.warn(\n", "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "Epoch 1: 45%|▍| 5/11 [01:46<02:07, 0.05it/s, v_num=0ytz, train/loss=2.310, val" + "Epoch 1: 100%|█| 11/11 [03:34<00:00, 0.05it/s, v_num=0ytz, train/loss=2.190, va\n", + "Validation: | | 0/? [00:00 Date: Sun, 21 Jan 2024 07:43:00 +0000 Subject: [PATCH 13/23] Fixing validation loss code --- RWKV-v5/src/model.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/RWKV-v5/src/model.py b/RWKV-v5/src/model.py index db95a2ce..e16dbfd5 100644 --- a/RWKV-v5/src/model.py +++ b/RWKV-v5/src/model.py @@ -1239,7 +1239,7 @@ def checkpointed_step(idx, targets, mask, last_shift_states, # Throw if total loss is NaN assert not torch.isnan(training_loss), "training_loss is NaN" - return training_loss + return sampling_loss, training_loss # # Training and validation steps @@ -1249,9 +1249,9 @@ def training_step(self, batch, batch_idx): # print("=== BATCH ID SHAPE ===", batch["input_ids"].shape) # print("=== BATCH AM SHAPE ===", batch["attention_mask"].shape) - total_loss = self.compute_loss(batch, batch_idx, True) + sampling_loss, training_loss = self.compute_loss(batch, batch_idx, True) - self.log('train/loss', total_loss, prog_bar=True) + self.log('train/loss', training_loss, prog_bar=True) # If set - forces the above train/loss log line to always be on a new line if self.substep_logging: print("") @@ -1261,21 +1261,21 @@ def training_step(self, batch, batch_idx): torch.cuda.empty_cache() # if loss not a number return None - if torch.isnan(total_loss): + if torch.isnan(training_loss): return None - return total_loss + return training_loss @TCompileBaseline def validation_step(self, batch, batch_idx): - total_loss = self.compute_loss(batch, batch_idx, False) - self.log('validation/loss', total_loss, prog_bar=True, sync_dist=True) + sampling_loss, training_loss = self.compute_loss(batch, batch_idx, False) + self.log('validation/loss', sampling_loss, prog_bar=True, sync_dist=True) # Reset the token tracking accordingly self._counting_tokens = 0 self._counting_time_start = time.time() - return total_loss + return sampling_loss ### --- # SimpleRWKV, a wrapper for RWKV that allows for simple usage of the model From 3516b7414b6e8854fefef21128dc100411a6a3f1 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Sun, 21 Jan 2024 20:51:57 +0000 Subject: [PATCH 14/23] 1B5 - enwiki / sloss runs --- .../selective-loss/1B5-baseline-run-10k.ipynb | 24708 +++++++++++++++- .../selective-loss/1B5-sloss-run-10k.ipynb | 19725 +++++++++++- .../config/1B5-enwiki_10k-world-packing.yaml | 2 +- .../1B5-sloss-enwiki_10k-world-packing.yaml | 4 +- 4 files changed, 43724 insertions(+), 715 deletions(-) diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb index 6583b6a7..4e2f6a99 100644 --- a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-10k.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -20,9 +20,9 @@ "text": [ "ENABLE_WANDB: True\n", "GPU_DEVICES: auto\n", - "NOTEBOOK_DIR: /home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss\n", - "TRAINER_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v5\n", - "PROJECT_DIR: /home/recursal/RWKV-infctx-trainer\n" + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" ] } ], @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -77,15 +77,15 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Saving the dataset (1/1 shards): 100%|█| 646/646 [00:00<00:00, 7625.90 examples/\n", - "Saving the dataset (1/1 shards): 100%|█| 803/803 [00:00<00:00, 33706.89 examples\n" + "Saving the dataset (1/1 shards): 100%|█| 613/613 [00:00<00:00, 11158.16 examples\n", + "Saving the dataset (1/1 shards): 100%|█| 803/803 [00:00<00:00, 28408.98 examples\n" ] } ], @@ -104,24 +104,24 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2024-01-21 05:28:21,163] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=1024', '--trainer.devices=auto'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=1024', '--trainer.devices=auto'].\n", + "[2024-01-21 07:59:19,967] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=2048', '--trainer.devices=auto'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=2048', '--trainer.devices=auto'].\n", "Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", "Building extension module wkv5...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "ninja: no work to do.\n", @@ -144,61 +144,52 @@ "\n", "[rank: 0] Seed set to 3941088705\n", "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", - "[2024-01-21 05:28:38,693] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-21 05:28:38,760] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-21 05:28:38,764] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-21 05:28:38,765] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-21 05:28:38,766] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-21 05:28:38,772] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-21 05:28:38,915] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[rank: 4] Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "[rank: 6] Seed set to 3941088705\n", + "[2024-01-21 07:59:41,746] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,749] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,756] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,757] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,758] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,759] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,759] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", "[rank: 1] Seed set to 3941088705\n", - "[rank: 7] Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "[rank: 6] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "[rank: 3] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "[rank: 2] Seed set to 3941088705\n", "[rank: 5] Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "[rank: 2] Seed set to 3941088705\n", + "[rank: 4] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "[rank: 3] Seed set to 3941088705\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 7] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Building extension module wkv5...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "ninja: no work to do.\n", @@ -211,51 +202,68 @@ "Loading extension module wkv5...\n", "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", "---\n", - "Loading extension module wkv5...\n", - "Loading extension module wkv5...\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", "---\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Building extension module wkv5...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "ninja: no work to do.\n", "Loading extension module wkv5...\n", "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", "---\n", - "[rank: 5] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", - "[rank: 6] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", "[rank: 1] Seed set to 3941088705\n", "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", - "[rank: 3] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", - "[rank: 7] Seed set to 3941088705\n", - "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", "[rank: 2] Seed set to 3941088705\n", "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 3] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 6] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", "[rank: 4] Seed set to 3941088705\n", "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 5] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 7] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240121_052914-a4wk0ytz\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240121_080019-vs7gwnia\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33minfctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/a4wk0ytz\u001b[0m\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:639: Checkpoint directory /home/recursal/RWKV-infctx-trainer/checkpoint/selective-loss/baseline exists and is not empty.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/vs7gwnia\u001b[0m\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", "#\n", "# RWKV lighting_trainer.py important notes \n", "# https://github.com/RWKV/RWKV-infctx-trainer \n", @@ -263,70 +271,61 @@ "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", "# - When resuming from checkpoint, the estimated time is inaccurate\n", - "#LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", "\n", - "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", "\n", "[RWKV.model] Configuring optimizer with\n", " - lr_init: 1.000e-04 (0.0001)\n", " - lr_final: 1.000e-04 (0.0001)\n", - "\n", - "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", "Building extension module fused_adam...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "ninja: no work to do.\n", "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.05791044235229492 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.08217287063598633 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", "Loading extension module fused_adam...\n", "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10227656364440918 seconds\n", "Loading extension module fused_adam...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.1022181510925293 seconds\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10234260559082031 seconds\n", "Loading extension module fused_adam...\n", "Loading extension module fused_adam...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10222673416137695 seconds\n", - "Time to load fused_adam op: 0.1024785041809082 seconds\n", - "Time to load fused_adam op: 0.10240483283996582 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.10240006446838379 seconds\n", + "Time to load fused_adam op: 0.10246396064758301 seconds\n", + "Time to load fused_adam op: 0.10230851173400879 seconds\n", + "Time to load fused_adam op: 0.10246992111206055 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.10248088836669922 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Loading extension module fused_adam...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10320234298706055 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.10278725624084473 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10330080986022949 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", "\n", " | Name | Type | Params\n", @@ -340,373 +339,24150 @@ "0 Non-trainable params\n", "1.6 B Total params\n", "6,311.018 Total estimated model params size (MB)\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", - "Epoch 0: 100%|████| 11/11 [03:10<00:00, 0.06it/s, v_num=0ytz, train/loss=2.120]\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (10) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 10/10 [02:30<00:00, 0.07it/s, v_num=wnia, train/loss=2.220]\n", "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", "# - When resuming from checkpoint, the estimated time is inaccurate\n", - "#LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", "\n", - "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", "\n", "[RWKV.model] Configuring optimizer with\n", " - lr_init: 1.000e-04 (0.0001)\n", " - lr_final: 1.000e-04 (0.0001)\n", "\n", - "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", "Building extension module fused_adam...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "ninja: no work to do.\n", "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.05791044235229492 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.08153295516967773 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", "Loading extension module fused_adam...\n", "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10227656364440918 seconds\n", "Loading extension module fused_adam...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.1022181510925293 seconds\n", "Loading extension module fused_adam...\n", "Loading extension module fused_adam...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10222673416137695 seconds\n", - "Time to load fused_adam op: 0.1024785041809082 seconds\n", - "Time to load fused_adam op: 0.10240483283996582 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.10219526290893555 seconds\n", + "Time to load fused_adam op: 0.10224127769470215 seconds\n", + "Time to load fused_adam op: 0.10227823257446289 seconds\n", + "Time to load fused_adam op: 0.10226273536682129 seconds\n", + "Time to load fused_adam op: 0.10217475891113281 seconds\n", + "Time to load fused_adam op: 0.10245966911315918 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", "Loading extension module fused_adam...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10320234298706055 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10330080986022949 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.10364770889282227 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", "\n", " | Name | Type | Params\n", @@ -340,152 +329,19396 @@ "0 Non-trainable params\n", "1.6 B Total params\n", "6,311.018 Total estimated model params size (MB)\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", - "Epoch 0: 100%|████| 11/11 [03:10<00:00, 0.06it/s, v_num=0ytz, train/loss=2.120]\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (10) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 10/10 [02:52<00:00, 0.06it/s, v_num=xvdu, train/loss=2.050]\n", "Validation: | | 0/? [00:00 Date: Sun, 21 Jan 2024 21:28:52 +0000 Subject: [PATCH 15/23] 100k run --- .../1B5-baseline-run-100k.ipynb | 24504 ++++++++++++++++ .../selective-loss/1B5-sloss-run-100k.ipynb | 19740 +++++++++++++ .../config/1B5-enwiki_100k-world-packing.yaml | 298 + .../1B5-sloss-enwiki_100k-world-packing.yaml | 319 + 4 files changed, 44861 insertions(+) create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-100k.ipynb create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-sloss-run-100k.ipynb create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_100k-world-packing.yaml create mode 100644 notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_100k-world-packing.yaml diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-100k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-100k.ipynb new file mode 100644 index 00000000..c2714362 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-100k.ipynb @@ -0,0 +1,24504 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1B5 Baseline Run\n", + "\n", + "Without any experimental tweaks" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"infctx-v5-selective-loss\"\n", + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-1B5-world.bin\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/RWKV-5-World-1B5-v2-20231025-ctx4096.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File ‘RWKV-v5-1B5-world.bin’ already there; not retrieving.\n" + ] + } + ], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving the dataset (1/1 shards): 100%|█| 613/613 [00:00<00:00, 11158.16 examples\n", + "Saving the dataset (1/1 shards): 100%|█| 803/803 [00:00<00:00, 28408.98 examples\n" + ] + } + ], + "source": [ + "# Lets preload the requried dataset \n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/config/1B5-enwiki_100k-world-packing.yaml\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multi-epoch training" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-21 07:59:19,967] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=2048', '--trainer.devices=auto'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=2048', '--trainer.devices=auto'].\n", + "Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 128\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 2\n", + " - effective_batch_size: 128\n", + "\n", + "[rank: 0] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-21 07:59:41,746] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,749] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,756] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,757] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,758] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,759] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,759] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 1] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 6] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 3] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 5] Seed set to 3941088705\n", + "[rank: 2] Seed set to 3941088705\n", + "[rank: 4] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 7] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 1] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 2] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 3] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 6] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 5] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 7] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240121_080019-vs7gwnia\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33minfctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/vs7gwnia\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 1.000e-04 (0.0001)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.08217287063598633 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10234260559082031 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10240006446838379 seconds\n", + "Time to load fused_adam op: 0.10246396064758301 seconds\n", + "Time to load fused_adam op: 0.10230851173400879 seconds\n", + "Time to load fused_adam op: 0.10246992111206055 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10248088836669922 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10278725624084473 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (10) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 10/10 [02:30<00:00, 0.07it/s, v_num=wnia, train/loss=2.220]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 1.000e-04 (0.0001)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.08153295516967773 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10219526290893555 seconds\n", + "Time to load fused_adam op: 0.10224127769470215 seconds\n", + "Time to load fused_adam op: 0.10227823257446289 seconds\n", + "Time to load fused_adam op: 0.10226273536682129 seconds\n", + "Time to load fused_adam op: 0.10217475891113281 seconds\n", + "Time to load fused_adam op: 0.10245966911315918 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10364770889282227 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (10) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 10/10 [02:52<00:00, 0.06it/s, v_num=xvdu, train/loss=2.050]\n", + "Validation: | | 0/? [00:00` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/baseline + filename: null + + # Save the top/last K checkpoints + save_top_k: 1 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 1e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 1e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_100k-world-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_100k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.05 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 1024 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_100k-world-packing.yaml b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_100k-world-packing.yaml new file mode 100644 index 00000000..6bbe3add --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_100k-world-packing.yaml @@ -0,0 +1,319 @@ +# lightning.pytorch==2.0.2 +seed_everything: 3941088705 +trainer: + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_2 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'infctx-v5-unit-test-baseline (train-ctx=1024, data-ctx=16k-packing)' + project: 'RWKV-X-SLoss' + tags: ['RWKV', 'infctx'] + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/sloss + filename: null + + # Save the top/last K checkpoints + save_top_k: 1 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 1e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 1e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Skip tokens that are already learnt, and are below the target threshold + token_loss_threshold: 1.0 + # Perform token based drop out at random, to the target % rate + token_dropout_rate: 0.05 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_100k-world-sloss-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_100k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.05 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 1024 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Prefix token masking + # + # The rationale behind this, is that the first X tokens should not be "backpropped" + # for any new training record. As its unfair to expect the model (or a human) make + # any resonable guesses at that stage. As such this is used to "mask" the first X tokens + # from the loss calculation, and thus not backpropped. + data_prefix_skip_mask: 8 + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null From 463b5eecf570584e28f039dca983ec4be60b96e3 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Mon, 22 Jan 2024 01:16:25 +0000 Subject: [PATCH 16/23] fixing spikes in token/s tracking --- RWKV-v5/src/model.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/RWKV-v5/src/model.py b/RWKV-v5/src/model.py index e16dbfd5..7c88a1a1 100644 --- a/RWKV-v5/src/model.py +++ b/RWKV-v5/src/model.py @@ -1214,7 +1214,7 @@ def checkpointed_step(idx, targets, mask, last_shift_states, self._counting_tokens += batch_ctx_len / 1000.0 # Log the line values - wandb.log({ + log_line = dict({ # The original loss and ctx_len (averaged by batch size) 'train/ctx_len': batch_ctx_len / microbatch_size, 'train/data_loss': sampling_loss, @@ -1223,9 +1223,6 @@ def checkpointed_step(idx, targets, mask, last_shift_states, 'train/tokens': training_tokens / microbatch_size, 'train/loss': training_loss, - # Perf tracking - f'perf/kTokens_per_sec.gpu.{global_rank}': self._counting_tokens / max(time.time() - self._counting_time_start, 1), - # This was disabled, cause it was confusing as it restarts every epoch # f'perf/kTokens_total.gpu.{global_rank}': self._counting_tokens, @@ -1237,6 +1234,12 @@ def checkpointed_step(idx, targets, mask, last_shift_states, 'batchidx': batch_idx }) + # Perf tracking, only if its more then 30 seconds + time_taken = time.time() - self._counting_time_start + if time_taken > 30.0: + log_line[f'perf/kTokens_per_sec.gpu.{global_rank}'] = self._counting_tokens / time_taken, + wandb.log(log_line) + # Throw if total loss is NaN assert not torch.isnan(training_loss), "training_loss is NaN" return sampling_loss, training_loss From 5e9802f1562c752c532361b5e47bbc4a3006dd97 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Mon, 22 Jan 2024 01:40:48 +0000 Subject: [PATCH 17/23] reverted how KTokens are measure (the newer graph is wierder) --- RWKV-v5/src/model.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/RWKV-v5/src/model.py b/RWKV-v5/src/model.py index 7c88a1a1..e16dbfd5 100644 --- a/RWKV-v5/src/model.py +++ b/RWKV-v5/src/model.py @@ -1214,7 +1214,7 @@ def checkpointed_step(idx, targets, mask, last_shift_states, self._counting_tokens += batch_ctx_len / 1000.0 # Log the line values - log_line = dict({ + wandb.log({ # The original loss and ctx_len (averaged by batch size) 'train/ctx_len': batch_ctx_len / microbatch_size, 'train/data_loss': sampling_loss, @@ -1223,6 +1223,9 @@ def checkpointed_step(idx, targets, mask, last_shift_states, 'train/tokens': training_tokens / microbatch_size, 'train/loss': training_loss, + # Perf tracking + f'perf/kTokens_per_sec.gpu.{global_rank}': self._counting_tokens / max(time.time() - self._counting_time_start, 1), + # This was disabled, cause it was confusing as it restarts every epoch # f'perf/kTokens_total.gpu.{global_rank}': self._counting_tokens, @@ -1234,12 +1237,6 @@ def checkpointed_step(idx, targets, mask, last_shift_states, 'batchidx': batch_idx }) - # Perf tracking, only if its more then 30 seconds - time_taken = time.time() - self._counting_time_start - if time_taken > 30.0: - log_line[f'perf/kTokens_per_sec.gpu.{global_rank}'] = self._counting_tokens / time_taken, - wandb.log(log_line) - # Throw if total loss is NaN assert not torch.isnan(training_loss), "training_loss is NaN" return sampling_loss, training_loss From 695e4c260456ab3b42164950ea19563acb4ed42f Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Mon, 22 Jan 2024 22:58:30 +0000 Subject: [PATCH 18/23] WIP 1B5 run --- RWKV-v5/config-example.yaml | 10 + RWKV-v5/src/data.py | 16 + .../memory-test/World-1B5-mem-finetune.ipynb | 4677 +++++++++++++++++ .../memory-test/memory_script/.gitignore | 4 + .../memory_script/eval_v5_memory_guided.py | 370 ++ .../v5-exp/memory-test/stage-1-tune.yaml | 410 ++ 6 files changed, 5487 insertions(+) create mode 100644 notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb create mode 100644 notebook/rwkv-x-exp/v5-exp/memory-test/memory_script/.gitignore create mode 100644 notebook/rwkv-x-exp/v5-exp/memory-test/memory_script/eval_v5_memory_guided.py create mode 100644 notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml diff --git a/RWKV-v5/config-example.yaml b/RWKV-v5/config-example.yaml index ca2564fd..28577f14 100644 --- a/RWKV-v5/config-example.yaml +++ b/RWKV-v5/config-example.yaml @@ -511,6 +511,16 @@ data: # this can be used together with sort_by_length, otherwise a shuffle will be done packing_in_sequence: False + # ---------------------------- + # Specal use caes flags + # ---------------------------- + + # Reverse the training dataset order before saving, this is useful for, + # optimizing dataset packing process, when using packing_in_sequence + # and sort_by_length desc order together + reverse_train_dataset_before_save: False + + # Path to the current checkpoint to continue training from # this should be the directory path, and ends with `.ckpt/` ckpt_path: null diff --git a/RWKV-v5/src/data.py b/RWKV-v5/src/data.py index ddf2c6df..7c75d65c 100644 --- a/RWKV-v5/src/data.py +++ b/RWKV-v5/src/data.py @@ -797,6 +797,13 @@ def merge_into_existing_samples(i): # Get the subset of the dataset src_dataset["train"] = src_dataset["train"].select(range(offset_val, offset_val + length_val)) + # Dataset flipping (if needed) + if kargs["reverse_train_dataset_before_save"]: + train_dataset = src_dataset["train"] + def reverse_dataset(x, idx): + return train_dataset[train_dataset.num_rows - idx - 1] + src_dataset["train"] = src_dataset["train"].map(reverse_dataset, with_indices=True, num_proc=num_cpus) + # Save the dataset to disk src_dataset.save_to_disk(kargs["data_path"]) @@ -961,6 +968,15 @@ def __init__( # this can be used together with sort_by_length, otherwise a shuffle will be done packing_in_sequence: bool = False, + # ---------------------------- + # Specal use caes flags + # ---------------------------- + + # Reverse the training dataset order before saving, this is useful for, + # optimizing dataset packing process, when using packing_in_sequence + # and sort_by_length desc order together + reverse_train_dataset_before_save: bool = False, + # ---------------------------- # System tweaks # ---------------------------- diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb new file mode 100644 index 00000000..1420c5e2 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb @@ -0,0 +1,4677 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RWKV World Memory Finetune (Memory Finetune)\n", + "\n", + "This takes an existing RWKV world model, and finetune them specifically for the memory repeat task of various sizes.\n", + "This test is used as an approximation of testing the model token memory size in the \"worse case scenerio\"\n", + "\n", + "- Using randomized data, so prior learning does not help, nor is it possible to compress the data\n", + "- Using a variety of token lengths, to avoid overfitting to a single length\n", + "- Based on the pretrained model (rwkv world)\n", + "- This process does \"destroy the model\" but it helps quantify the model limits\n", + "\n", + "In practise however, the model may show \"attention range\" longer then what is benchmarked, as natural text is highly compressible. Unlike the pure randomized data that was being tested here.\n", + "\n", + "This runner has been optimized to run on 8 x 24GB vram nodes, you should allocate atleast 500GB disk space.\n", + "\n", + "> This project assumes you have the rwkv-infctx conda env setup, and you are executing in that environment - see the main README.md for the conda env setup steps" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure your environment settings\n", + "(!Important: you will need to rerun the below cell, if you restart your kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", + "TRAINER_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /home/recursal/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"[8x4090] RWKV-v5-1B5-World\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-1B5-world.pth\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/RWKV-5-World-1B5-v2-20231025-ctx4096.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "MEMORY_SCRIPT_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./notebook/util-scripts/memory_script\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download the pretrained model\n", + "(if you want to skip the the basemodel train + instruct tune)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File ‘RWKV-v5-1B5-world.pth’ already there; not retrieving.\n" + ] + } + ], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finetune 1 : Dataset preperation\n", + "\n", + "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: rwkv in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (0.8.22)\n", + "Requirement already satisfied: asyncio in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (3.4.3)\n", + "Requirement already satisfied: aiocsv in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (1.2.5)\n", + "Requirement already satisfied: aiofiles in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (23.2.1)\n", + "Requirement already satisfied: tokenizers>=0.13.2 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from rwkv) (0.15.0)\n", + "Requirement already satisfied: huggingface_hub<1.0,>=0.16.4 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from tokenizers>=0.13.2->rwkv) (0.20.2)\n", + "Requirement already satisfied: filelock in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (2023.10.0)\n", + "Requirement already satisfied: requests in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (4.66.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (6.0.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (4.9.0)\n", + "Requirement already satisfied: packaging>=20.9 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (23.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from requests->huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from requests->huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from requests->huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (1.26.18)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from requests->huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (2023.11.17)\n" + ] + } + ], + "source": [ + "# Folder and eval pip setup\n", + "!cp -r \"{MEMORY_SCRIPT_DIR}/\" \"{NOTEBOOK_DIR}/\"\n", + "!python3 -m pip install rwkv asyncio aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n", + "Generated JSONL file with - 5 max words, 500 samples - at ./dataset/gen-word-5-count.jsonl\n", + "Generated JSONL file with - 15 max words, 500 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 2 max words, 300 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 10 max words, 500 samples - at ./dataset/gen-word-10-count.jsonl\n", + "Generated JSONL file with - 25 max words, 500 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 35 max words, 500 samples - at ./dataset/gen-word-35-count.jsonl\n", + "Generated JSONL file with - 50 max words, 500 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 125 max words, 125 samples - at ./dataset/gen-word-125-count.jsonl\n", + "Generated JSONL file with - 140 max words, 125 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated JSONL file with - 65 max words, 500 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 20 max words, 500 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 105 max words, 125 samples - at ./dataset/gen-word-105-count.jsonl\n", + "Generated JSONL file with - 180 max words, 125 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 4 max words, 1000 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 160 max words, 125 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated JSONL file with - 165 max words, 125 samples - at ./dataset/gen-word-165-count.jsonl\n", + "Generated JSONL file with - 175 max words, 125 samples - at ./dataset/gen-word-175-count.jsonl\n", + "Generated JSONL file with - 220 max words, 100 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 30 max words, 500 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 40 max words, 500 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 110 max words, 125 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 55 max words, 500 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 245 max words, 100 samples - at ./dataset/gen-word-245-count.jsonl\n", + "Generated JSONL file with - 130 max words, 125 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 115 max words, 125 samples - at ./dataset/gen-word-115-count.jsonl\n", + "Generated JSONL file with - 45 max words, 500 samples - at ./dataset/gen-word-45-count.jsonl\n", + "Generated JSONL file with - 285 max words, 100 samples - at ./dataset/gen-word-285-count.jsonl\n", + "Generated JSONL file with - 120 max words, 125 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 205 max words, 100 samples - at ./dataset/gen-word-205-count.jsonl\n", + "Generated JSONL file with - 150 max words, 125 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 170 max words, 125 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 145 max words, 125 samples - at ./dataset/gen-word-145-count.jsonl\n", + "Generated JSONL file with - 70 max words, 500 samples - at ./dataset/gen-word-70-count.jsonl\n", + "Generated JSONL file with - 75 max words, 500 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 135 max words, 125 samples - at ./dataset/gen-word-135-count.jsonl\n", + "Generated JSONL file with - 295 max words, 100 samples - at ./dataset/gen-word-295-count.jsonl\n", + "Generated JSONL file with - 155 max words, 125 samples - at ./dataset/gen-word-155-count.jsonl\n", + "Generated JSONL file with - 60 max words, 500 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 360 max words, 100 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated JSONL file with - 185 max words, 125 samples - at ./dataset/gen-word-185-count.jsonl\n", + "Generated JSONL file with - 85 max words, 500 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 210 max words, 100 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 190 max words, 125 samples - at ./dataset/gen-word-190-count.jsonl\n", + "Generated JSONL file with - 215 max words, 100 samples - at ./dataset/gen-word-215-count.jsonl\n", + "Generated JSONL file with - 200 max words, 125 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 90 max words, 500 samples - at ./dataset/gen-word-90-count.jsonl\n", + "Generated JSONL file with - 235 max words, 100 samples - at ./dataset/gen-word-235-count.jsonl\n", + "Generated JSONL file with - 290 max words, 100 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 80 max words, 500 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 95 max words, 500 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 305 max words, 100 samples - at ./dataset/gen-word-305-count.jsonl\n", + "Generated JSONL file with - 265 max words, 100 samples - at ./dataset/gen-word-265-count.jsonl\n", + "Generated JSONL file with - 250 max words, 100 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 355 max words, 100 samples - at ./dataset/gen-word-355-count.jsonl\n", + "Generated JSONL file with - 340 max words, 100 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 195 max words, 125 samples - at ./dataset/gen-word-195-count.jsonl\n", + "Generated JSONL file with - 225 max words, 100 samples - at ./dataset/gen-word-225-count.jsonl\n", + "Generated JSONL file with - 270 max words, 100 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 365 max words, 100 samples - at ./dataset/gen-word-365-count.jsonl\n", + "Generated JSONL file with - 230 max words, 100 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 255 max words, 100 samples - at ./dataset/gen-word-255-count.jsonl\n", + "Generated JSONL file with - 260 max words, 100 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated JSONL file with - 100 max words, 500 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 300 max words, 100 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated JSONL file with - 240 max words, 100 samples - at ./dataset/gen-word-240-count.jsonl\n", + "Generated JSONL file with - 275 max words, 100 samples - at ./dataset/gen-word-275-count.jsonl\n", + "Generated JSONL file with - 280 max words, 100 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated JSONL file with - 310 max words, 100 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated JSONL file with - 315 max words, 100 samples - at ./dataset/gen-word-315-count.jsonl\n", + "Generated JSONL file with - 350 max words, 100 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated JSONL file with - 370 max words, 100 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated JSONL file with - 420 max words, 100 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated JSONL file with - 650 max words, 100 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated JSONL file with - 320 max words, 100 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 455 max words, 100 samples - at ./dataset/gen-word-455-count.jsonl\n", + "Generated JSONL file with - 390 max words, 100 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated JSONL file with - 400 max words, 100 samples - at ./dataset/gen-word-400-count.jsonl\n", + "Generated JSONL file with - 540 max words, 100 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated JSONL file with - 395 max words, 100 samples - at ./dataset/gen-word-395-count.jsonl\n", + "Generated JSONL file with - 700 max words, 100 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated JSONL file with - 645 max words, 100 samples - at ./dataset/gen-word-645-count.jsonl\n", + "Generated JSONL file with - 375 max words, 100 samples - at ./dataset/gen-word-375-count.jsonl\n", + "Generated JSONL file with - 380 max words, 100 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated JSONL file with - 655 max words, 100 samples - at ./dataset/gen-word-655-count.jsonl\n", + "Generated JSONL file with - 485 max words, 100 samples - at ./dataset/gen-word-485-count.jsonl\n", + "Generated JSONL file with - 660 max words, 100 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated JSONL file with - 675 max words, 100 samples - at ./dataset/gen-word-675-count.jsonl\n", + "Generated JSONL file with - 690 max words, 100 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated JSONL file with - 560 max words, 100 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated JSONL file with - 760 max words, 100 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated JSONL file with - 605 max words, 100 samples - at ./dataset/gen-word-605-count.jsonl\n", + "Generated JSONL file with - 430 max words, 100 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 550 max words, 100 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated JSONL file with - 815 max words, 100 samples - at ./dataset/gen-word-815-count.jsonl\n", + "Generated JSONL file with - 715 max words, 100 samples - at ./dataset/gen-word-715-count.jsonl\n", + "Generated JSONL file with - 495 max words, 100 samples - at ./dataset/gen-word-495-count.jsonl\n", + "Generated JSONL file with - 795 max words, 100 samples - at ./dataset/gen-word-795-count.jsonl\n", + "Generated JSONL file with - 465 max words, 100 samples - at ./dataset/gen-word-465-count.jsonl\n", + "Generated JSONL file with - 840 max words, 100 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated JSONL file with - 330 max words, 100 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 545 max words, 100 samples - at ./dataset/gen-word-545-count.jsonl\n", + "Generated JSONL file with - 335 max words, 100 samples - at ./dataset/gen-word-335-count.jsonl\n", + "Generated JSONL file with - 830 max words, 100 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated JSONL file with - 525 max words, 100 samples - at ./dataset/gen-word-525-count.jsonl\n", + "Generated a single JSONL file with 1348 samples (100 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated JSONL file with - 425 max words, 100 samples - at ./dataset/gen-word-425-count.jsonl\n", + "Generated a single JSONL file with 705 samples (100 token repeat) - 365 max words - at ./dataset/shuffle-word-365-count.jsonl\n", + "Generated JSONL file with - 765 max words, 100 samples - at ./dataset/gen-word-765-count.jsonl\n", + "Generated JSONL file with - 725 max words, 100 samples - at ./dataset/gen-word-725-count.jsonl\n", + "Generated JSONL file with - 325 max words, 100 samples - at ./dataset/gen-word-325-count.jsonl\n", + "Generated JSONL file with - 435 max words, 100 samples - at ./dataset/gen-word-435-count.jsonl\n", + "Generated JSONL file with - 640 max words, 100 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated JSONL file with - 755 max words, 100 samples - at ./dataset/gen-word-755-count.jsonl\n", + "Generated JSONL file with - 475 max words, 100 samples - at ./dataset/gen-word-475-count.jsonl\n", + "Generated JSONL file with - 695 max words, 100 samples - at ./dataset/gen-word-695-count.jsonl\n", + "Generated JSONL file with - 535 max words, 100 samples - at ./dataset/gen-word-535-count.jsonl\n", + "Generated JSONL file with - 575 max words, 100 samples - at ./dataset/gen-word-575-count.jsonl\n", + "Generated JSONL file with - 590 max words, 100 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated a single JSONL file with 703 samples (100 token repeat) - 355 max words - at ./dataset/shuffle-word-355-count.jsonl\n", + "Generated JSONL file with - 345 max words, 100 samples - at ./dataset/gen-word-345-count.jsonl\n", + "Generated a single JSONL file with 4426 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated JSONL file with - 490 max words, 100 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated JSONL file with - 670 max words, 100 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated a single JSONL file with 588 samples (100 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated a single JSONL file with 401 samples (100 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated JSONL file with - 800 max words, 100 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated JSONL file with - 530 max words, 100 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated JSONL file with - 925 max words, 100 samples - at ./dataset/gen-word-925-count.jsonl\n", + "Generated JSONL file with - 785 max words, 100 samples - at ./dataset/gen-word-785-count.jsonl\n", + "Generated JSONL file with - 385 max words, 100 samples - at ./dataset/gen-word-385-count.jsonl\n", + "Generated JSONL file with - 685 max words, 100 samples - at ./dataset/gen-word-685-count.jsonl\n", + "Generated JSONL file with - 500 max words, 100 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated a single JSONL file with 705 samples (100 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated JSONL file with - 585 max words, 100 samples - at ./dataset/gen-word-585-count.jsonl\n", + "Generated a single JSONL file with 405 samples (100 token repeat) - 685 max words - at ./dataset/shuffle-word-685-count.jsonl\n", + "Generated a single JSONL file with 3125 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated JSONL file with - 845 max words, 100 samples - at ./dataset/gen-word-845-count.jsonl\n", + "Generated a single JSONL file with 1370 samples (100 token repeat) - 185 max words - at ./dataset/shuffle-word-185-count.jsonl\n", + "Generated JSONL file with - 860 max words, 100 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 625 max words, 100 samples - at ./dataset/gen-word-625-count.jsonl\n", + "Generated a single JSONL file with 3761 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated JSONL file with - 750 max words, 100 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated a single JSONL file with 3542 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated a single JSONL file with 405 samples (100 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated JSONL file with - 450 max words, 100 samples - at ./dataset/gen-word-450-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "Generated a single JSONL file with 315 samples (100 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated JSONL file with - 665 max words, 100 samples - at ./dataset/gen-word-665-count.jsonl\n", + "Generated a single JSONL file with 706 samples (100 token repeat) - 345 max words - at ./dataset/shuffle-word-345-count.jsonl\n", + "Generated JSONL file with - 710 max words, 100 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated JSONL file with - 600 max words, 100 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated JSONL file with - 880 max words, 100 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated JSONL file with - 680 max words, 100 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated JSONL file with - 730 max words, 100 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated a single JSONL file with 398 samples (100 token repeat) - 755 max words - at ./dataset/shuffle-word-755-count.jsonl\n", + "Generated JSONL file with - 580 max words, 100 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated JSONL file with - 805 max words, 100 samples - at ./dataset/gen-word-805-count.jsonl\n", + "Generated JSONL file with - 735 max words, 100 samples - at ./dataset/gen-word-735-count.jsonl\n", + "Generated a single JSONL file with 399 samples (100 token repeat) - 785 max words - at ./dataset/shuffle-word-785-count.jsonl\n", + "Generated JSONL file with - 775 max words, 100 samples - at ./dataset/gen-word-775-count.jsonl\n", + "Generated JSONL file with - 705 max words, 100 samples - at ./dataset/gen-word-705-count.jsonl\n", + "Generated JSONL file with - 610 max words, 100 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated JSONL file with - 770 max words, 100 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated a single JSONL file with 4071 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", + "Generated JSONL file with - 780 max words, 100 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated JSONL file with - 825 max words, 100 samples - at ./dataset/gen-word-825-count.jsonl\n", + "Generated JSONL file with - 885 max words, 100 samples - at ./dataset/gen-word-885-count.jsonl\n", + "Generated JSONL file with - 740 max words, 100 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated JSONL file with - 915 max words, 100 samples - at ./dataset/gen-word-915-count.jsonl\n", + "Generated JSONL file with - 855 max words, 100 samples - at ./dataset/gen-word-855-count.jsonl\n", + "Generated a single JSONL file with 908 samples (100 token repeat) - 295 max words - at ./dataset/shuffle-word-295-count.jsonl\n", + "Generated JSONL file with - 720 max words, 100 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated JSONL file with - 790 max words, 100 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 403 samples (100 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated a single JSONL file with 1362 samples (100 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated JSONL file with - 920 max words, 100 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 895 max words, 100 samples - at ./dataset/gen-word-895-count.jsonl\n", + "Generated a single JSONL file with 400 samples (100 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 314 samples (100 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated JSONL file with - 460 max words, 100 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated a single JSONL file with 581 samples (100 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated a single JSONL file with 915 samples (100 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated a single JSONL file with 1910 samples (100 token repeat) - 115 max words - at ./dataset/shuffle-word-115-count.jsonl\n", + "Generated JSONL file with - 975 max words, 100 samples - at ./dataset/gen-word-975-count.jsonl\n", + "Generated JSONL file with - 835 max words, 100 samples - at ./dataset/gen-word-835-count.jsonl\n", + "Generated a single JSONL file with 316 samples (100 token repeat) - 865 max words - at ./dataset/shuffle-word-865-count.jsonl\n", + "Generated JSONL file with - 810 max words, 100 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated a single JSONL file with 402 samples (100 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated JSONL file with - 630 max words, 100 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated a single JSONL file with 322 samples (100 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated JSONL file with - 445 max words, 100 samples - at ./dataset/gen-word-445-count.jsonl\n", + "Generated JSONL file with - 850 max words, 100 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated a single JSONL file with 711 samples (100 token repeat) - 315 max words - at ./dataset/shuffle-word-315-count.jsonl\n", + "Generated a single JSONL file with 703 samples (100 token repeat) - 385 max words - at ./dataset/shuffle-word-385-count.jsonl\n", + "Generated a single JSONL file with 2807 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated JSONL file with - 875 max words, 100 samples - at ./dataset/gen-word-875-count.jsonl\n", + "Generated a single JSONL file with 1725 samples (100 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 925 max words - at ./dataset/shuffle-word-925-count.jsonl\n", + "Generated JSONL file with - 865 max words, 100 samples - at ./dataset/gen-word-865-count.jsonl\n", + "Generated JSONL file with - 470 max words, 100 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated JSONL file with - 905 max words, 100 samples - at ./dataset/gen-word-905-count.jsonl\n", + "Generated a single JSONL file with 399 samples (100 token repeat) - 725 max words - at ./dataset/shuffle-word-725-count.jsonl\n", + "Generated a single JSONL file with 403 samples (100 token repeat) - 665 max words - at ./dataset/shuffle-word-665-count.jsonl\n", + "Generated JSONL file with - 820 max words, 100 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated a single JSONL file with 249 samples (100 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated a single JSONL file with 314 samples (100 token repeat) - 815 max words - at ./dataset/shuffle-word-815-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 100 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 410 max words, 100 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated JSONL file with - 930 max words, 100 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated JSONL file with - 440 max words, 100 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 100 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated JSONL file with - 900 max words, 100 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated a single JSONL file with 404 samples (100 token repeat) - 655 max words - at ./dataset/shuffle-word-655-count.jsonl\n", + "Generated a single JSONL file with 398 samples (100 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated JSONL file with - 480 max words, 100 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 870 max words, 100 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated JSONL file with - 910 max words, 100 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated a single JSONL file with 316 samples (100 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated JSONL file with - 510 max words, 100 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 1015 max words, 100 samples - at ./dataset/gen-word-1015-count.jsonl\n", + "Generated JSONL file with - 1155 max words, 100 samples - at ./dataset/gen-word-1155-count.jsonl\n", + "Generated JSONL file with - 405 max words, 100 samples - at ./dataset/gen-word-405-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated JSONL file with - 520 max words, 100 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated a single JSONL file with 704 samples (100 token repeat) - 335 max words - at ./dataset/shuffle-word-335-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 100 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated JSONL file with - 415 max words, 100 samples - at ./dataset/gen-word-415-count.jsonl\n", + "Generated JSONL file with - 515 max words, 100 samples - at ./dataset/gen-word-515-count.jsonl\n", + "Generated JSONL file with - 890 max words, 100 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 400 samples (100 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 100 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated a single JSONL file with 915 samples (100 token repeat) - 265 max words - at ./dataset/shuffle-word-265-count.jsonl\n", + "Generated a single JSONL file with 702 samples (100 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated a single JSONL file with 917 samples (100 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated a single JSONL file with 5246 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated a single JSONL file with 724 samples (100 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 100 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated a single JSONL file with 318 samples (100 token repeat) - 885 max words - at ./dataset/shuffle-word-885-count.jsonl\n", + "Generated a single JSONL file with 709 samples (100 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated JSONL file with - 570 max words, 100 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated a single JSONL file with 318 samples (100 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated JSONL file with - 615 max words, 100 samples - at ./dataset/gen-word-615-count.jsonl\n", + "Generated a single JSONL file with 590 samples (100 token repeat) - 455 max words - at ./dataset/shuffle-word-455-count.jsonl\n", + "Generated a single JSONL file with 732 samples (100 token repeat) - 305 max words - at ./dataset/shuffle-word-305-count.jsonl\n", + "Generated a single JSONL file with 576 samples (100 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated a single JSONL file with 403 samples (100 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated JSONL file with - 1095 max words, 100 samples - at ./dataset/gen-word-1095-count.jsonl\n", + "Generated a single JSONL file with 4840 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated JSONL file with - 565 max words, 100 samples - at ./dataset/gen-word-565-count.jsonl\n", + "Generated a single JSONL file with 398 samples (100 token repeat) - 795 max words - at ./dataset/shuffle-word-795-count.jsonl\n", + "Generated JSONL file with - 505 max words, 100 samples - at ./dataset/gen-word-505-count.jsonl\n", + "Generated a single JSONL file with 204 samples (100 token repeat) - 1345 max words - at ./dataset/shuffle-word-1345-count.jsonl\n", + "Generated JSONL file with - 1115 max words, 100 samples - at ./dataset/gen-word-1115-count.jsonl\n", + "Generated JSONL file with - 620 max words, 100 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 100 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated a single JSONL file with 320 samples (100 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated a single JSONL file with 201 samples (100 token repeat) - 1355 max words - at ./dataset/shuffle-word-1355-count.jsonl\n", + "Generated a single JSONL file with 398 samples (100 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated JSONL file with - 1025 max words, 100 samples - at ./dataset/gen-word-1025-count.jsonl\n", + "Generated a single JSONL file with 1022 samples (100 token repeat) - 215 max words - at ./dataset/shuffle-word-215-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 100 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated JSONL file with - 1345 max words, 100 samples - at ./dataset/gen-word-1345-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 100 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated a single JSONL file with 401 samples (100 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 400 samples (100 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated a single JSONL file with 298 samples (100 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 935 max words - at ./dataset/shuffle-word-935-count.jsonl\n", + "Generated a single JSONL file with 405 samples (100 token repeat) - 695 max words - at ./dataset/shuffle-word-695-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated a single JSONL file with 320 samples (100 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated a single JSONL file with 399 samples (100 token repeat) - 745 max words - at ./dataset/shuffle-word-745-count.jsonl\n", + "Generated JSONL file with - 1135 max words, 100 samples - at ./dataset/gen-word-1135-count.jsonl\n", + "Generated a single JSONL file with 498 samples (100 token repeat) - 575 max words - at ./dataset/shuffle-word-575-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated JSONL file with - 1105 max words, 100 samples - at ./dataset/gen-word-1105-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 545 max words - at ./dataset/shuffle-word-545-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated a single JSONL file with 400 samples (100 token repeat) - 675 max words - at ./dataset/shuffle-word-675-count.jsonl\n", + "Generated a single JSONL file with 316 samples (100 token repeat) - 845 max words - at ./dataset/shuffle-word-845-count.jsonl\n", + "Generated a single JSONL file with 590 samples (100 token repeat) - 435 max words - at ./dataset/shuffle-word-435-count.jsonl\n", + "Generated a single JSONL file with 252 samples (100 token repeat) - 1245 max words - at ./dataset/shuffle-word-1245-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated a single JSONL file with 5858 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated a single JSONL file with 710 samples (100 token repeat) - 325 max words - at ./dataset/shuffle-word-325-count.jsonl\n", + "Generated JSONL file with - 635 max words, 100 samples - at ./dataset/gen-word-635-count.jsonl\n", + "Generated a single JSONL file with 404 samples (100 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 100 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated a single JSONL file with 497 samples (100 token repeat) - 565 max words - at ./dataset/shuffle-word-565-count.jsonl\n", + "Generated a single JSONL file with 592 samples (100 token repeat) - 425 max words - at ./dataset/shuffle-word-425-count.jsonl\n", + "Generated a single JSONL file with 3318 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated a single JSONL file with 701 samples (100 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated a single JSONL file with 592 samples (100 token repeat) - 465 max words - at ./dataset/shuffle-word-465-count.jsonl\n", + "Generated a single JSONL file with 314 samples (100 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated a single JSONL file with 324 samples (100 token repeat) - 855 max words - at ./dataset/shuffle-word-855-count.jsonl\n", + "Generated a single JSONL file with 399 samples (100 token repeat) - 775 max words - at ./dataset/shuffle-word-775-count.jsonl\n", + "Generated JSONL file with - 1235 max words, 100 samples - at ./dataset/gen-word-1235-count.jsonl\n", + "Generated a single JSONL file with 317 samples (100 token repeat) - 805 max words - at ./dataset/shuffle-word-805-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 905 max words - at ./dataset/shuffle-word-905-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1455 max words - at ./dataset/shuffle-word-1455-count.jsonl\n", + "Generated JSONL file with - 950 max words, 100 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 100 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated a single JSONL file with 403 samples (100 token repeat) - 645 max words - at ./dataset/shuffle-word-645-count.jsonl\n", + "Generated a single JSONL file with 404 samples (100 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated a single JSONL file with 252 samples (100 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated a single JSONL file with 398 samples (100 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated a single JSONL file with 206 samples (100 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated a single JSONL file with 254 samples (100 token repeat) - 1255 max words - at ./dataset/shuffle-word-1255-count.jsonl\n", + "Generated JSONL file with - 555 max words, 100 samples - at ./dataset/gen-word-555-count.jsonl\n", + "Generated a single JSONL file with 702 samples (100 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 100 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 100 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 100 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated a single JSONL file with 400 samples (100 token repeat) - 715 max words - at ./dataset/shuffle-word-715-count.jsonl\n", + "Generated a single JSONL file with 398 samples (100 token repeat) - 765 max words - at ./dataset/shuffle-word-765-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 985 max words - at ./dataset/shuffle-word-985-count.jsonl\n", + "Generated JSONL file with - 1195 max words, 100 samples - at ./dataset/gen-word-1195-count.jsonl\n", + "Generated a single JSONL file with 399 samples (100 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated a single JSONL file with 319 samples (100 token repeat) - 825 max words - at ./dataset/shuffle-word-825-count.jsonl\n", + "Generated JSONL file with - 980 max words, 100 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated JSONL file with - 955 max words, 100 samples - at ./dataset/gen-word-955-count.jsonl\n", + "Generated a single JSONL file with 1683 samples (100 token repeat) - 135 max words - at ./dataset/shuffle-word-135-count.jsonl\n", + "Generated JSONL file with - 1335 max words, 100 samples - at ./dataset/gen-word-1335-count.jsonl\n", + "Generated JSONL file with - 1455 max words, 100 samples - at ./dataset/gen-word-1455-count.jsonl\n", + "Generated JSONL file with - 1055 max words, 100 samples - at ./dataset/gen-word-1055-count.jsonl\n", + "Generated JSONL file with - 1445 max words, 100 samples - at ./dataset/gen-word-1445-count.jsonl\n", + "Generated JSONL file with - 1465 max words, 100 samples - at ./dataset/gen-word-1465-count.jsonl\n", + "Generated a single JSONL file with 399 samples (100 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated JSONL file with - 1205 max words, 100 samples - at ./dataset/gen-word-1205-count.jsonl\n", + "Generated a single JSONL file with 296 samples (100 token repeat) - 1115 max words - at ./dataset/shuffle-word-1115-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 100 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated a single JSONL file with 318 samples (100 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated a single JSONL file with 1537 samples (100 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated JSONL file with - 935 max words, 100 samples - at ./dataset/gen-word-935-count.jsonl\n", + "Generated a single JSONL file with 595 samples (100 token repeat) - 415 max words - at ./dataset/shuffle-word-415-count.jsonl\n", + "Generated JSONL file with - 595 max words, 100 samples - at ./dataset/gen-word-595-count.jsonl\n", + "Generated a single JSONL file with 324 samples (100 token repeat) - 895 max words - at ./dataset/shuffle-word-895-count.jsonl\n", + "Generated a single JSONL file with 586 samples (100 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated a single JSONL file with 313 samples (100 token repeat) - 835 max words - at ./dataset/shuffle-word-835-count.jsonl\n", + "Generated JSONL file with - 940 max words, 100 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 100 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 100 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 100 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated JSONL file with - 985 max words, 100 samples - at ./dataset/gen-word-985-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 100 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated JSONL file with - 960 max words, 100 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 1468 samples (100 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated a single JSONL file with 312 samples (100 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated JSONL file with - 1495 max words, 100 samples - at ./dataset/gen-word-1495-count.jsonl\n", + "Generated a single JSONL file with 398 samples (100 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", + "Generated a single JSONL file with 1001 samples (100 token repeat) - 225 max words - at ./dataset/shuffle-word-225-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 915 max words - at ./dataset/shuffle-word-915-count.jsonl\n", + "Generated a single JSONL file with 316 samples (100 token repeat) - 875 max words - at ./dataset/shuffle-word-875-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 100 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated a single JSONL file with 586 samples (100 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "Generated a single JSONL file with 1394 samples (100 token repeat) - 175 max words - at ./dataset/shuffle-word-175-count.jsonl\n", + "Generated JSONL file with - 1085 max words, 100 samples - at ./dataset/gen-word-1085-count.jsonl\n", + "Generated a single JSONL file with 203 samples (100 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 100 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1475 max words - at ./dataset/shuffle-word-1475-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 100 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated a single JSONL file with 399 samples (100 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 100 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated JSONL file with - 745 max words, 100 samples - at ./dataset/gen-word-745-count.jsonl\n", + "Generated a single JSONL file with 204 samples (100 token repeat) - 1315 max words - at ./dataset/shuffle-word-1315-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1025 max words - at ./dataset/shuffle-word-1025-count.jsonl\n", + "Generated a single JSONL file with 204 samples (100 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 397 samples (100 token repeat) - 705 max words - at ./dataset/shuffle-word-705-count.jsonl\n", + "Generated a single JSONL file with 247 samples (100 token repeat) - 1275 max words - at ./dataset/shuffle-word-1275-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 525 max words - at ./dataset/shuffle-word-525-count.jsonl\n", + "Generated a single JSONL file with 299 samples (100 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated a single JSONL file with 298 samples (100 token repeat) - 1175 max words - at ./dataset/shuffle-word-1175-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 100 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated a single JSONL file with 248 samples (100 token repeat) - 1235 max words - at ./dataset/shuffle-word-1235-count.jsonl\n", + "Generated a single JSONL file with 699 samples (100 token repeat) - 395 max words - at ./dataset/shuffle-word-395-count.jsonl\n", + "Generated a single JSONL file with 500 samples (100 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated JSONL file with - 1065 max words, 100 samples - at ./dataset/gen-word-1065-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 100 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated a single JSONL file with 1382 samples (100 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated a single JSONL file with 597 samples (100 token repeat) - 405 max words - at ./dataset/shuffle-word-405-count.jsonl\n", + "Generated JSONL file with - 1240 max words, 100 samples - at ./dataset/gen-word-1240-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", + "Generated JSONL file with - 1475 max words, 100 samples - at ./dataset/gen-word-1475-count.jsonl\n", + "Generated JSONL file with - 1365 max words, 100 samples - at ./dataset/gen-word-1365-count.jsonl\n", + "Generated JSONL file with - 995 max words, 100 samples - at ./dataset/gen-word-995-count.jsonl\n", + "Generated JSONL file with - 1175 max words, 100 samples - at ./dataset/gen-word-1175-count.jsonl\n", + "Generated JSONL file with - 1035 max words, 100 samples - at ./dataset/gen-word-1035-count.jsonl\n", + "Generated a single JSONL file with 402 samples (100 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 555 max words - at ./dataset/shuffle-word-555-count.jsonl\n", + "Generated JSONL file with - 1310 max words, 100 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated a single JSONL file with 401 samples (100 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated JSONL file with - 1245 max words, 100 samples - at ./dataset/gen-word-1245-count.jsonl\n", + "Generated JSONL file with - 1125 max words, 100 samples - at ./dataset/gen-word-1125-count.jsonl\n", + "Generated a single JSONL file with 500 samples (100 token repeat) - 505 max words - at ./dataset/shuffle-word-505-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 100 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated a single JSONL file with 2057 samples (100 token repeat) - 105 max words - at ./dataset/shuffle-word-105-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 100 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated JSONL file with - 1225 max words, 100 samples - at ./dataset/gen-word-1225-count.jsonl\n", + "Generated JSONL file with - 945 max words, 100 samples - at ./dataset/gen-word-945-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 100 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 100 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated JSONL file with - 1185 max words, 100 samples - at ./dataset/gen-word-1185-count.jsonl\n", + "Generated a single JSONL file with 498 samples (100 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1015 max words - at ./dataset/shuffle-word-1015-count.jsonl\n", + "Generated JSONL file with - 1295 max words, 100 samples - at ./dataset/gen-word-1295-count.jsonl\n", + "Generated JSONL file with - 1375 max words, 100 samples - at ./dataset/gen-word-1375-count.jsonl\n", + "Generated a single JSONL file with 257 samples (100 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated a single JSONL file with 1046 samples (100 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "Generated JSONL file with - 1425 max words, 100 samples - at ./dataset/gen-word-1425-count.jsonl\n", + "Generated JSONL file with - 1325 max words, 100 samples - at ./dataset/gen-word-1325-count.jsonl\n", + "Generated a single JSONL file with 254 samples (100 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "Generated a single JSONL file with 498 samples (100 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated JSONL file with - 965 max words, 100 samples - at ./dataset/gen-word-965-count.jsonl\n", + "Generated JSONL file with - 1075 max words, 100 samples - at ./dataset/gen-word-1075-count.jsonl\n", + "Generated a single JSONL file with 1412 samples (100 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1055 max words - at ./dataset/shuffle-word-1055-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 100 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 249 samples (100 token repeat) - 1225 max words - at ./dataset/shuffle-word-1225-count.jsonl\n", + "Generated JSONL file with - 1415 max words, 100 samples - at ./dataset/gen-word-1415-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1185 max words - at ./dataset/shuffle-word-1185-count.jsonl\n", + "Generated JSONL file with - 1355 max words, 100 samples - at ./dataset/gen-word-1355-count.jsonl\n", + "Generated a single JSONL file with 17799 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated a single JSONL file with 1845 samples (100 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated a single JSONL file with 996 samples (100 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 100 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 100 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated a single JSONL file with 584 samples (100 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated a single JSONL file with 919 samples (100 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 100 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 100 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 100 samples - at ./dataset/gen-word-1320-count.jsonl\n", + "Generated JSONL file with - 1045 max words, 100 samples - at ./dataset/gen-word-1045-count.jsonl\n", + "Generated a single JSONL file with 202 samples (100 token repeat) - 1335 max words - at ./dataset/shuffle-word-1335-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 100 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated JSONL file with - 1405 max words, 100 samples - at ./dataset/gen-word-1405-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 100 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated a single JSONL file with 398 samples (100 token repeat) - 735 max words - at ./dataset/shuffle-word-735-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 585 max words - at ./dataset/shuffle-word-585-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 100 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 955 max words - at ./dataset/shuffle-word-955-count.jsonl\n", + "Generated a single JSONL file with 913 samples (100 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated a single JSONL file with 939 samples (100 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated JSONL file with - 1275 max words, 100 samples - at ./dataset/gen-word-1275-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 100 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated a single JSONL file with 699 samples (100 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 100 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated a single JSONL file with 704 samples (100 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated JSONL file with - 970 max words, 100 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated a single JSONL file with 2932 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 945 max words - at ./dataset/shuffle-word-945-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 100 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated a single JSONL file with 2659 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated a single JSONL file with 258 samples (100 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1435 max words - at ./dataset/shuffle-word-1435-count.jsonl\n", + "Generated a single JSONL file with 1791 samples (100 token repeat) - 125 max words - at ./dataset/shuffle-word-125-count.jsonl\n", + "Generated a single JSONL file with 931 samples (100 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", + "Generated a single JSONL file with 915 samples (100 token repeat) - 285 max words - at ./dataset/shuffle-word-285-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated JSONL file with - 1385 max words, 100 samples - at ./dataset/gen-word-1385-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 100 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1035 max words - at ./dataset/shuffle-word-1035-count.jsonl\n", + "Generated a single JSONL file with 593 samples (100 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated a single JSONL file with 250 samples (100 token repeat) - 1265 max words - at ./dataset/shuffle-word-1265-count.jsonl\n", + "Generated a single JSONL file with 990 samples (100 token repeat) - 235 max words - at ./dataset/shuffle-word-235-count.jsonl\n", + "Generated a single JSONL file with 239 samples (100 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated a single JSONL file with 298 samples (100 token repeat) - 1125 max words - at ./dataset/shuffle-word-1125-count.jsonl\n", + "Generated a single JSONL file with 203 samples (100 token repeat) - 1375 max words - at ./dataset/shuffle-word-1375-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 100 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 100 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated a single JSONL file with 497 samples (100 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated a single JSONL file with 500 samples (100 token repeat) - 515 max words - at ./dataset/shuffle-word-515-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 100 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated a single JSONL file with 298 samples (100 token repeat) - 1195 max words - at ./dataset/shuffle-word-1195-count.jsonl\n", + "Generated JSONL file with - 1145 max words, 100 samples - at ./dataset/gen-word-1145-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "Generated a single JSONL file with 203 samples (100 token repeat) - 1365 max words - at ./dataset/shuffle-word-1365-count.jsonl\n", + "Generated a single JSONL file with 500 samples (100 token repeat) - 595 max words - at ./dataset/shuffle-word-595-count.jsonl\n", + "Generated JSONL file with - 990 max words, 100 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 100 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated JSONL file with - 1165 max words, 100 samples - at ./dataset/gen-word-1165-count.jsonl\n", + "Generated JSONL file with - 1315 max words, 100 samples - at ./dataset/gen-word-1315-count.jsonl\n", + "Generated a single JSONL file with 1582 samples (100 token repeat) - 145 max words - at ./dataset/shuffle-word-145-count.jsonl\n", + "Generated a single JSONL file with 202 samples (100 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated JSONL file with - 1435 max words, 100 samples - at ./dataset/gen-word-1435-count.jsonl\n", + "Generated a single JSONL file with 969 samples (100 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated a single JSONL file with 299 samples (100 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated JSONL file with - 1005 max words, 100 samples - at ./dataset/gen-word-1005-count.jsonl\n", + "Generated a single JSONL file with 298 samples (100 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated a single JSONL file with 203 samples (100 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1065 max words - at ./dataset/shuffle-word-1065-count.jsonl\n", + "Generated a single JSONL file with 258 samples (100 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 1992 samples (100 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated JSONL file with - 1485 max words, 100 samples - at ./dataset/gen-word-1485-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 100 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated a single JSONL file with 7531 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated a single JSONL file with 299 samples (100 token repeat) - 1155 max words - at ./dataset/shuffle-word-1155-count.jsonl\n", + "Generated a single JSONL file with 917 samples (100 token repeat) - 275 max words - at ./dataset/shuffle-word-275-count.jsonl\n", + "Generated a single JSONL file with 583 samples (100 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated a single JSONL file with 403 samples (100 token repeat) - 615 max words - at ./dataset/shuffle-word-615-count.jsonl\n", + "Generated a single JSONL file with 1353 samples (100 token repeat) - 195 max words - at ./dataset/shuffle-word-195-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 100 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated a single JSONL file with 13090 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1005 max words - at ./dataset/shuffle-word-1005-count.jsonl\n", + "Generated JSONL file with - 1215 max words, 100 samples - at ./dataset/gen-word-1215-count.jsonl\n", + "Generated JSONL file with - 1285 max words, 100 samples - at ./dataset/gen-word-1285-count.jsonl\n", + "Generated a single JSONL file with 1498 samples (100 token repeat) - 155 max words - at ./dataset/shuffle-word-155-count.jsonl\n", + "Generated a single JSONL file with 954 samples (100 token repeat) - 245 max words - at ./dataset/shuffle-word-245-count.jsonl\n", + "Generated a single JSONL file with 257 samples (100 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1405 max words - at ./dataset/shuffle-word-1405-count.jsonl\n", + "Generated a single JSONL file with 599 samples (100 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 535 max words - at ./dataset/shuffle-word-535-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1465 max words - at ./dataset/shuffle-word-1465-count.jsonl\n", + "Generated a single JSONL file with 258 samples (100 token repeat) - 1295 max words - at ./dataset/shuffle-word-1295-count.jsonl\n", + "Generated a single JSONL file with 203 samples (100 token repeat) - 1305 max words - at ./dataset/shuffle-word-1305-count.jsonl\n", + "Generated a single JSONL file with 1006 samples (100 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated a single JSONL file with 297 samples (100 token repeat) - 1135 max words - at ./dataset/shuffle-word-1135-count.jsonl\n", + "Generated a single JSONL file with 1083 samples (100 token repeat) - 205 max words - at ./dataset/shuffle-word-205-count.jsonl\n", + "Generated a single JSONL file with 8728 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 6585 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated JSONL file with - 1305 max words, 100 samples - at ./dataset/gen-word-1305-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated a single JSONL file with 700 samples (100 token repeat) - 375 max words - at ./dataset/shuffle-word-375-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1265 max words, 100 samples - at ./dataset/gen-word-1265-count.jsonl\n", + "Generated a single JSONL file with 253 samples (100 token repeat) - 1215 max words - at ./dataset/shuffle-word-1215-count.jsonl\n", + "Generated a single JSONL file with 298 samples (100 token repeat) - 1145 max words - at ./dataset/shuffle-word-1145-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1075 max words - at ./dataset/shuffle-word-1075-count.jsonl\n", + "Generated JSONL file with - 1255 max words, 100 samples - at ./dataset/gen-word-1255-count.jsonl\n", + "Generated a single JSONL file with 405 samples (100 token repeat) - 605 max words - at ./dataset/shuffle-word-605-count.jsonl\n", + "Generated a single JSONL file with 595 samples (100 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", + "Generated a single JSONL file with 584 samples (100 token repeat) - 445 max words - at ./dataset/shuffle-word-445-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 100 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated a single JSONL file with 204 samples (100 token repeat) - 1385 max words - at ./dataset/shuffle-word-1385-count.jsonl\n", + "Generated a single JSONL file with 296 samples (100 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated a single JSONL file with 201 samples (100 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated JSONL file with - 1440 max words, 100 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "Generated a single JSONL file with 204 samples (100 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated a single JSONL file with 299 samples (100 token repeat) - 1165 max words - at ./dataset/shuffle-word-1165-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated a single JSONL file with 500 samples (100 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated a single JSONL file with 1439 samples (100 token repeat) - 165 max words - at ./dataset/shuffle-word-165-count.jsonl\n", + "Generated a single JSONL file with 931 samples (100 token repeat) - 255 max words - at ./dataset/shuffle-word-255-count.jsonl\n", + "Generated a single JSONL file with 400 samples (100 token repeat) - 635 max words - at ./dataset/shuffle-word-635-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1495 max words - at ./dataset/shuffle-word-1495-count.jsonl\n", + "Generated JSONL file with - 1395 max words, 100 samples - at ./dataset/gen-word-1395-count.jsonl\n", + "Generated a single JSONL file with 586 samples (100 token repeat) - 475 max words - at ./dataset/shuffle-word-475-count.jsonl\n", + "Generated a single JSONL file with 498 samples (100 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated a single JSONL file with 1622 samples (100 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", + "Generated a single JSONL file with 204 samples (100 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1045 max words - at ./dataset/shuffle-word-1045-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1425 max words - at ./dataset/shuffle-word-1425-count.jsonl\n", + "Generated a single JSONL file with 299 samples (100 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 254 samples (100 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1085 max words - at ./dataset/shuffle-word-1085-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 995 max words - at ./dataset/shuffle-word-995-count.jsonl\n", + "Generated a single JSONL file with 704 samples (100 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated a single JSONL file with 499 samples (100 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", + "Generated a single JSONL file with 592 samples (100 token repeat) - 485 max words - at ./dataset/shuffle-word-485-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 975 max words - at ./dataset/shuffle-word-975-count.jsonl\n", + "Generated a single JSONL file with 201 samples (100 token repeat) - 1325 max words - at ./dataset/shuffle-word-1325-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 965 max words - at ./dataset/shuffle-word-965-count.jsonl\n", + "Generated a single JSONL file with 588 samples (100 token repeat) - 495 max words - at ./dataset/shuffle-word-495-count.jsonl\n", + "Generated a single JSONL file with 205 samples (100 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated a single JSONL file with 257 samples (100 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated a single JSONL file with 297 samples (100 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated a single JSONL file with 296 samples (100 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated a single JSONL file with 703 samples (100 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated a single JSONL file with 403 samples (100 token repeat) - 625 max words - at ./dataset/shuffle-word-625-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1095 max words - at ./dataset/shuffle-word-1095-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1445 max words - at ./dataset/shuffle-word-1445-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1485 max words - at ./dataset/shuffle-word-1485-count.jsonl\n", + "Generated a single JSONL file with 297 samples (100 token repeat) - 1105 max words - at ./dataset/shuffle-word-1105-count.jsonl\n", + "Generated a single JSONL file with 246 samples (100 token repeat) - 1205 max words - at ./dataset/shuffle-word-1205-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 10630 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated a single JSONL file with 300 samples (100 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated a single JSONL file with 250 samples (100 token repeat) - 1285 max words - at ./dataset/shuffle-word-1285-count.jsonl\n", + "Generated a single JSONL file with 203 samples (100 token repeat) - 1395 max words - at ./dataset/shuffle-word-1395-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1415 max words - at ./dataset/shuffle-word-1415-count.jsonl\n", + "Generated a single JSONL file with 26127 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated a single JSONL file with 55948 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "## Done ##\n", + "total 1011M\n", + "drwxrwxr-x 2 recursal recursal 84K Jan 22 20:29 .\n", + "drwxrwxr-x 5 recursal recursal 4.0K Jan 22 18:29 ..\n", + "-rw-rw-r-- 1 recursal recursal 973K Jan 22 20:29 gen-word-1000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 994K Jan 22 20:29 gen-word-1005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 526K Jan 22 20:29 gen-word-100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 999K Jan 22 20:29 gen-word-1010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 994K Jan 22 20:29 gen-word-1015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1004K Jan 22 20:29 gen-word-1020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 998K Jan 22 20:29 gen-word-1025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1016K Jan 22 20:29 gen-word-1030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1017K Jan 22 20:29 gen-word-1035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1018K Jan 22 20:29 gen-word-1040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1021K Jan 22 20:29 gen-word-1045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 143K Jan 22 20:29 gen-word-105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 99K Jan 22 20:29 gen-word-10-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 143K Jan 22 20:29 gen-word-110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 152K Jan 22 20:29 gen-word-115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 159K Jan 22 20:29 gen-word-120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 166K Jan 22 20:29 gen-word-125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 168K Jan 22 20:29 gen-word-130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 173K Jan 22 20:29 gen-word-135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 178K Jan 22 20:29 gen-word-140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 189K Jan 22 20:29 gen-word-145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 192K Jan 22 20:29 gen-word-150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 201K Jan 22 20:29 gen-word-155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 122K Jan 22 20:29 gen-word-15-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 205K Jan 22 20:29 gen-word-160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 216K Jan 22 20:29 gen-word-165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 216K Jan 22 20:29 gen-word-170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 225K Jan 22 20:29 gen-word-175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 229K Jan 22 20:29 gen-word-180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 234K Jan 22 20:29 gen-word-185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 251K Jan 22 20:29 gen-word-190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 251K Jan 22 20:29 gen-word-195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 252K Jan 22 20:29 gen-word-200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 207K Jan 22 20:29 gen-word-205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 149K Jan 22 20:29 gen-word-20-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 215K Jan 22 20:29 gen-word-210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 218K Jan 22 20:29 gen-word-215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 222K Jan 22 20:29 gen-word-220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 229K Jan 22 20:29 gen-word-225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 236K Jan 22 20:29 gen-word-230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 242K Jan 22 20:29 gen-word-235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 251K Jan 22 20:29 gen-word-240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 246K Jan 22 20:29 gen-word-245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 256K Jan 22 20:29 gen-word-250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 262K Jan 22 20:29 gen-word-255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 171K Jan 22 20:29 gen-word-25-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 262K Jan 22 20:29 gen-word-260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 267K Jan 22 20:29 gen-word-265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 277K Jan 22 20:29 gen-word-270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 276K Jan 22 20:29 gen-word-275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 275K Jan 22 20:29 gen-word-280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 289K Jan 22 20:29 gen-word-285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 290K Jan 22 20:29 gen-word-290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 296K Jan 22 20:29 gen-word-295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 307K Jan 22 20:29 gen-word-300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 307K Jan 22 20:29 gen-word-305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 196K Jan 22 20:29 gen-word-30-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 307K Jan 22 20:29 gen-word-310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 322K Jan 22 20:29 gen-word-315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 318K Jan 22 20:29 gen-word-320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 332K Jan 22 20:29 gen-word-325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 328K Jan 22 20:29 gen-word-330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 334K Jan 22 20:29 gen-word-335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 344K Jan 22 20:29 gen-word-340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 349K Jan 22 20:29 gen-word-345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 356K Jan 22 20:29 gen-word-350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 354K Jan 22 20:29 gen-word-355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 219K Jan 22 20:29 gen-word-35-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 351K Jan 22 20:29 gen-word-360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 364K Jan 22 20:29 gen-word-365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 372K Jan 22 20:29 gen-word-370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 375K Jan 22 20:29 gen-word-375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 387K Jan 22 20:29 gen-word-380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 386K Jan 22 20:29 gen-word-385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 386K Jan 22 20:29 gen-word-390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 399K Jan 22 20:29 gen-word-395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 395K Jan 22 20:29 gen-word-400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 404K Jan 22 20:29 gen-word-405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 241K Jan 22 20:29 gen-word-40-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 408K Jan 22 20:29 gen-word-410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 411K Jan 22 20:29 gen-word-415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 420K Jan 22 20:29 gen-word-420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 426K Jan 22 20:29 gen-word-425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 429K Jan 22 20:29 gen-word-430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 431K Jan 22 20:29 gen-word-435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 433K Jan 22 20:29 gen-word-440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 440K Jan 22 20:29 gen-word-445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 449K Jan 22 20:29 gen-word-450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 457K Jan 22 20:29 gen-word-455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 267K Jan 22 20:29 gen-word-45-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 464K Jan 22 20:29 gen-word-460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 456K Jan 22 20:29 gen-word-465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 470K Jan 22 20:29 gen-word-470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 466K Jan 22 20:29 gen-word-475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 480K Jan 22 20:29 gen-word-480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 477K Jan 22 20:29 gen-word-485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 486K Jan 22 20:29 gen-word-490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 483K Jan 22 20:29 gen-word-495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 499K Jan 22 20:29 gen-word-500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 494K Jan 22 20:29 gen-word-505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 295K Jan 22 20:29 gen-word-50-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 506K Jan 22 20:29 gen-word-510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 499K Jan 22 20:29 gen-word-515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 515K Jan 22 20:29 gen-word-520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 522K Jan 22 20:29 gen-word-525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 526K Jan 22 20:29 gen-word-530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 532K Jan 22 20:29 gen-word-535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 535K Jan 22 20:29 gen-word-540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 539K Jan 22 20:29 gen-word-545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 546K Jan 22 20:29 gen-word-550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 555K Jan 22 20:29 gen-word-555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 315K Jan 22 20:29 gen-word-55-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 557K Jan 22 20:29 gen-word-560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 558K Jan 22 20:29 gen-word-565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 566K Jan 22 20:29 gen-word-570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 567K Jan 22 20:29 gen-word-575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 569K Jan 22 20:29 gen-word-580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 586K Jan 22 20:29 gen-word-585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 583K Jan 22 20:29 gen-word-590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 594K Jan 22 20:29 gen-word-595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 72K Jan 22 20:29 gen-word-5-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 585K Jan 22 20:29 gen-word-600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 608K Jan 22 20:29 gen-word-605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 346K Jan 22 20:29 gen-word-60-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 596K Jan 22 20:29 gen-word-610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 612K Jan 22 20:29 gen-word-615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 610K Jan 22 20:29 gen-word-620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 623K Jan 22 20:29 gen-word-625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 628K Jan 22 20:29 gen-word-630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 631K Jan 22 20:29 gen-word-635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 636K Jan 22 20:29 gen-word-640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 633K Jan 22 20:29 gen-word-645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 644K Jan 22 20:29 gen-word-650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 639K Jan 22 20:29 gen-word-655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 369K Jan 22 20:29 gen-word-65-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 651K Jan 22 20:29 gen-word-660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 656K Jan 22 20:29 gen-word-665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 656K Jan 22 20:29 gen-word-670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 674K Jan 22 20:29 gen-word-675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 681K Jan 22 20:29 gen-word-680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 680K Jan 22 20:29 gen-word-685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 678K Jan 22 20:29 gen-word-690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 686K Jan 22 20:29 gen-word-695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 697K Jan 22 20:29 gen-word-700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 705K Jan 22 20:29 gen-word-705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 393K Jan 22 20:29 gen-word-70-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 697K Jan 22 20:29 gen-word-710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 709K Jan 22 20:29 gen-word-715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 717K Jan 22 20:29 gen-word-720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 707K Jan 22 20:29 gen-word-725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 720K Jan 22 20:29 gen-word-730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 734K Jan 22 20:29 gen-word-735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 727K Jan 22 20:29 gen-word-740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 735K Jan 22 20:29 gen-word-745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 737K Jan 22 20:29 gen-word-750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 748K Jan 22 20:29 gen-word-755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 411K Jan 22 20:29 gen-word-75-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 747K Jan 22 20:29 gen-word-760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 745K Jan 22 20:29 gen-word-765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 752K Jan 22 20:29 gen-word-770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 754K Jan 22 20:29 gen-word-775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 771K Jan 22 20:29 gen-word-780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 765K Jan 22 20:29 gen-word-785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 776K Jan 22 20:29 gen-word-790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 782K Jan 22 20:29 gen-word-795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 792K Jan 22 20:29 gen-word-800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 796K Jan 22 20:29 gen-word-805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 441K Jan 22 20:29 gen-word-80-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 801K Jan 22 20:29 gen-word-810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 805K Jan 22 20:29 gen-word-815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 820K Jan 22 20:29 gen-word-820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 814K Jan 22 20:29 gen-word-825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 811K Jan 22 20:29 gen-word-830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 820K Jan 22 20:29 gen-word-835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 826K Jan 22 20:29 gen-word-840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 838K Jan 22 20:29 gen-word-845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 836K Jan 22 20:29 gen-word-850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 840K Jan 22 20:29 gen-word-855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 462K Jan 22 20:29 gen-word-85-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 855K Jan 22 20:29 gen-word-860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 858K Jan 22 20:29 gen-word-865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 854K Jan 22 20:29 gen-word-870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 864K Jan 22 20:29 gen-word-875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 867K Jan 22 20:29 gen-word-880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 865K Jan 22 20:29 gen-word-885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 866K Jan 22 20:29 gen-word-890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 877K Jan 22 20:29 gen-word-895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 883K Jan 22 20:29 gen-word-900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 896K Jan 22 20:29 gen-word-905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 489K Jan 22 20:29 gen-word-90-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 892K Jan 22 20:29 gen-word-910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 897K Jan 22 20:29 gen-word-915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 907K Jan 22 20:29 gen-word-920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 907K Jan 22 20:29 gen-word-925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 912K Jan 22 20:29 gen-word-930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 927K Jan 22 20:29 gen-word-935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 936K Jan 22 20:29 gen-word-940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 923K Jan 22 20:29 gen-word-945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 944K Jan 22 20:29 gen-word-950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 933K Jan 22 20:29 gen-word-955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 509K Jan 22 20:29 gen-word-95-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 944K Jan 22 20:29 gen-word-960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 947K Jan 22 20:29 gen-word-965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 955K Jan 22 20:29 gen-word-970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 964K Jan 22 20:29 gen-word-975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 967K Jan 22 20:29 gen-word-980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 966K Jan 22 20:29 gen-word-985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 966K Jan 22 20:29 gen-word-990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 974K Jan 22 20:29 gen-word-995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 20:29 shuffle-word-10-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 20:29 shuffle-word-15-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 20:29 shuffle-word-20-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 20:29 shuffle-word-25-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 20:29 shuffle-word-30-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 20:29 shuffle-word-35-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 20:29 shuffle-word-40-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 20:29 shuffle-word-45-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 20:29 shuffle-word-50-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 20:29 shuffle-word-55-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 8.0M Jan 22 20:29 shuffle-word-5-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 20:29 shuffle-word-60-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 20:29 shuffle-word-65-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 20:29 shuffle-word-70-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 20:29 shuffle-word-75-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 20:29 shuffle-word-80-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-85-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-90-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-95-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 141K Jan 22 20:29 word-2-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for < 50 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 300 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 4 1000 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 500 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {105..200..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {205..1500..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolving data files: 100%|███████████████| 601/601 [00:00<00:00, 370647.95it/s]\n", + "Filter (num_proc=160): 100%|██| 372801/372801 [00:03<00:00, 93704.48 examples/s]\n", + "Map (num_proc=160): 100%|████| 363015/363015 [00:02<00:00, 127526.30 examples/s]\n", + "Map (num_proc=160): 100%|█████| 363015/363015 [00:07<00:00, 46066.68 examples/s]\n", + "Map (num_proc=160): 100%|███████| 87900/87900 [00:03<00:00, 27106.01 examples/s]\n", + "Saving the dataset (2/2 shards): 100%|█| 87900/87900 [00:01<00:00, 82134.79 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 364/364 [00:00<00:00, 13312.35 examples\n" + ] + } + ], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-1-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-1-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finetune 1 : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-22 20:30:14,781] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-1-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8x4090] RWKV-v5-1B5-World - Mem-Finetune-1 (bs=256, train-ctx=2048, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=2048'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-1-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8x4090] RWKV-v5-1B5-World - Mem-Finetune-1 (bs=256, train-ctx=2048, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=2048'].\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 2435230032\n", + "Seed set to 2435230032\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 256\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 4\n", + " - effective_batch_size: 256\n", + "\n", + "[rank: 0] Seed set to 2435230032\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-22 20:30:34,516] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-22 20:30:34,555] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-22 20:30:34,573] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-22 20:30:34,643] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-22 20:30:34,665] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-22 20:30:34,667] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-22 20:30:34,779] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[rank: 6] Seed set to 2435230032\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 3] Seed set to 2435230032\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 4] Seed set to 2435230032\n", + "[rank: 7] Seed set to 2435230032\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 5] Seed set to 2435230032\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 2] Seed set to 2435230032\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 1] Seed set to 2435230032\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "ninja: no work to do.\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 1] Seed set to 2435230032\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 2] Seed set to 2435230032\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 3] Seed set to 2435230032\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 6] Seed set to 2435230032\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 5] Seed set to 2435230032\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 7] Seed set to 2435230032\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 4] Seed set to 2435230032\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240122_203109-hhwmn520\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8x4090] RWKV-v5-1B5-World - Mem-Finetune-1 (bs=256, train-ctx=2048, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/hhwmn520\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.07379484176635742 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.1038506031036377 seconds\n", + "Time to load fused_adam op: 0.10417509078979492 seconds\n", + "Time to load fused_adam op: 0.10447382926940918 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10385513305664062 seconds\n", + "Time to load fused_adam op: 0.10325026512145996 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10282731056213379 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10386180877685547 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "Epoch 0: 7%| | 100/1374 [04:50<1:01:41, 0.34it/s, v_num=n520, train/loss=0.05/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|█| 1374/1374 [1:01:19<00:00, 0.37it/s, v_num=n520, train/loss=0.1\n", + "Validation: | | 0/? [00:00 [verbose/csv-file-path] [from_token_count] [to_token_count]") + sys.exit(1) + + # Verbose mode + verbose = False + csv_file_path = None + if len(sys.argv) >= 3: + if sys.argv[2] == "verbose": + verbose = True + elif sys.argv[2] == "none": + csv_file_path = None + else: + csv_file_path = sys.argv[2] + + from src.model import SimpleRWKV + model_path = sys.argv[1] + model = SimpleRWKV(model_path, device="cuda") + + # The evaluation size range + MAX_TOKENS = 1000 + + # Get the cursed " on" token (this happens only in some models) + on_token = model.encode(" on")[0] + markdown_token = model.encode("```")[0] + newline_token = model.encode("\n")[0] + + # Pipeline args to use + token_ban = [on_token] # ban the generation of some tokens + + # Read the test word list, taken from ./eval_word_list.txt + with open(os.path.join(SCRIPT_DIR,'./eval_word_list.txt'), 'r') as f: + test_word_list = f.read() + + # Open the CSV file, to write into + if csv_file_path != None: + # Ensure parent dir is in place + csv_file_dir = os.path.dirname(csv_file_path) + if not os.path.exists(csv_file_dir): + os.makedirs(csv_file_dir) + + # Open the CSV file + csv_file_handle = await aiofiles.open(csv_file_path, 'w', encoding="utf-8", newline="") + csv_writer = AsyncWriter(csv_file_handle, dialect="unix") + + # Write the header + await csv_writer.writerow([ + 'eval_token_count', 'token_idx', 'matched', + 'top_token_str', 'top_token_percentage', + 'eval_token_str', 'eval_token_pos', 'eval_token_percentage', + 'is_random_baseline' + ]) + else: + csv_writer = None + + # Convert it to tokens + test_word_tokens = model.encode(test_word_list) + + # Prompt template prefix to use + prompt_prefix = "Instruction: Repeat this text exactly as it is\n\nInput:\n```\n" + prompt_suffix = "\n```\n\n" + reply_prefix = "Response:\n```\n" + reply_suffix = "\n```\n" + + # Process the prompt prefix + prompt_prefix_logits, prompt_prefix_state = model.forward(model.encode(prompt_prefix), None) + mid_segment_tokens = model.encode(prompt_suffix+reply_prefix) + + # Function use to get words with the following token count + def get_words_tokens_with_token_count(token_count): + target_tokens = test_word_tokens[:token_count] + target_words = model.decode(target_tokens) + + # Normalize to lowercase + target_words = target_words.lower() + return target_words + + # Function for validating once the model at a specific token count + async def validate_model(token_count, withoutInstructAndInput=False): + # Start the performance timer + start_time = time.time() + # print(f"-- Validating model for token count: ", token_count) + + # Get the target tokens + target_tokens = test_word_tokens[:token_count] + + # Validate that hte token list match the target token count (throw an error if not) + if len(target_tokens) != token_count: + raise Exception("Target tokens count mismatch - target is probably larger then the eval word list") + + logits = None + state = None + + # We validate with, the instruct and input + # having the option to disable this, helps us have a randomized baseline score + if withoutInstructAndInput == True: + # Because we actuall need a logit to start with, we compromise with a new line at minimum + first_logits, state = model.forward([newline_token], state) + else: + # Clone the state + state = copy.deepcopy(prompt_prefix_state) + + # Compute the document to memorize + logits, state = model.forward(target_tokens, state) + + # Compute the mid segment + first_logits, state = model.forward(mid_segment_tokens, state) + + # Score counter + matched_tokens = 0 + + # CSV rows to write + csv_rows = [] + + # Common validation function + # ---- + + async def validateToken(sorted_probs, sorted_indices, softmax_arr, tokenIdx, match_count = 0): + # Get the top token info + top_token = sorted_indices[0].item() + top_prob = sorted_probs[0].item() + + # Check if the token matches, and score it + target = target_tokens[tokenIdx] + if top_token == target: + match_count += 1 + + # Find the target token position + if verbose or csv_writer != None: + target_prob = softmax_arr[target].item() + target_pos = 0 + for i in range(len(sorted_indices)): + if sorted_indices[i].item() == target: + target_pos = i + break + + # Get top_token_str & target_token_str, but because an error can happen, we catch it + try: + top_token_str = model.decode([top_token]).encode('unicode_escape').decode('utf-8') + except: + top_token_str = "" + try: + target_token_str = model.decode([target]).encode('unicode_escape').decode('utf-8') + except: + target_token_str = "" + + # Print the results, for verbose + if verbose: + if top_token == target: + print(f' - token {i} (hit) : "{top_token_str}" ({top_prob*100:.2f}%)') + else: + print(f' - token {i} (miss): "{top_token_str}" ({top_prob*100:.2f}%) | "{target_token_str}" pos={target_pos} ({target_prob*100:.2f}%)') + + # Log it to CSV file if enabled + if csv_writer != None: + # We need to encode the strings safely (escape special characters, new lines, etc) + csv_rows.append([ + token_count, tokenIdx, top_token == target, + top_token_str, top_prob, + target_token_str, target_pos, target_prob, + withoutInstructAndInput == True + ]) + + # Return matched count + return match_count + + # Lets validate the first logits + # ---- + + # Apply token ban + for n in token_ban: + first_logits[n] = -float('inf') + + # Validate the first token (special case) + first_logits = torch.softmax(first_logits, dim=-1) + sorted_probs, sorted_indices = torch.sort(first_logits, descending=True, stable=True, dim=-1) + matched_tokens = await validateToken(sorted_probs, sorted_indices, first_logits, 0) + + # Print the timing till now + # print(f"-- Finished validating first token ({time.time() - start_time:.2f}s)") + + # Loop through the target tokens in set of 1000 + # ---- + for subsetPos in range(0, token_count, 1000): + + # Get the subset, and forward it + token_subset = target_tokens[subsetPos:subsetPos+1000] + subset_logits, state = model.forward(token_subset, state, all_logits=True) + + # Apply the token ban + for n in token_ban: + subset_logits[:,n] = -float('inf') + + # Sort via GPU + subset_logits = subset_logits.to('cuda') + subset_logits = torch.softmax(subset_logits, dim=-1) + sorted_probs, sorted_indices = torch.sort(subset_logits, descending=True, stable=True, dim=-1) + + # Convert back to CPU land + sorted_probs = sorted_probs.to('cpu') + sorted_indices = sorted_indices.to('cpu') + + # Loop through the subset + for i in range(len(token_subset)): + pos = i+1+subsetPos + if pos <= len(target_tokens)-1: + matched_tokens = await validateToken(sorted_probs[i], sorted_indices[i], subset_logits[i], pos, matched_tokens) + + # Garbage collect + gc.collect() + torch.cuda.empty_cache() + + # # Forward all the target tokens in a single pass + # # --- + # all_logits, state = model.forward(target_tokens, state, all_logits=True) + # # print(f"-- Finished multi-token forward pass ({time.time() - start_time:.2f}s)") + + # # Extract the sorted values, and cast them to CPU + # # --- + # # Apply token ban + # for n in token_ban: + # all_logits[:,n] = -float('inf') + + # # GPU based sort + # all_logits = all_logits.to('cuda') + # all_logits = torch.softmax(all_logits, dim=-1) + # sorted_probs, sorted_indices = torch.sort(all_logits, descending=True, stable=True, dim=-1) + + # # Convert back to CPU land + # sorted_probs = sorted_probs.to('cpu') + # sorted_indices = sorted_indices.to('cpu') + + # # print(f"-- Finished sorting logits ({time.time() - start_time:.2f}s)") + + # # Lets evaluate the logits, and check if they match one by one + # for i in range(len(target_tokens)-1): + # # Validate the token + # matched_tokens = await validateToken(sorted_probs[i], sorted_indices[i], all_logits[i], i+1, matched_tokens) + + # print(f"-- Finished token matching ({time.time() - start_time:.2f}s)") + + # Write the CSV rows + if csv_writer != None: + await csv_writer.writerows(csv_rows) + + # print(f"-- Finished CSV write ({time.time() - start_time:.2f}s)") + + # Percentage token match + matched_percentage = matched_tokens / token_count * 100.0 + + # Print the results + if withoutInstructAndInput == False: + print(f'## Model validation for {token_count} tokens : {matched_percentage}% similarity, with {matched_tokens} matched token, and {token_count - matched_tokens} token mismatch') + else: + print(f"## Finished baseline model to eval output predictive matching (aka 0 memory?), for {MAX_TOKENS} tokens") + + if verbose: + print("## ------------------ ") + + # # Print more info if there are differences + # if(char_diff_count > 0): + # print("--- target ---") + # print(target_words) + # print("--- completion ---") + # print(completion) + # print("------------------") + + # Print the start of model validation + print("###") + print("### Model validation start ###") + print("###") + + # Check if its an extended eval set + if len(sys.argv) == 4: + EXTENDED_EVAL = True + + # Get the int value from sys.argv[3] + MAX_TOKENS = int(sys.argv[3]) + MIN_TOKENS = 1100 + elif len(sys.argv) == 5: + EXTENDED_EVAL = True + + # Get the int value from sys.argv[3]/[4] + MIN_TOKENS = int(sys.argv[3]) + MAX_TOKENS = int(sys.argv[4]) + else: + EXTENDED_EVAL = False + + # Validate the model at different token counts + if EXTENDED_EVAL == False: + # We validate in increments of 5, from 5 to 150 + for i in range(5, 150, 5): + await validate_model(i) + + # We validate in increments of 10 from 150 to 300 + for i in range(150, 300, 10): + await validate_model(i) + + # We validate in increments of 25 from 300 to 700 + for i in range(300, 700, 25): + await validate_model(i) + + # We validate in increments of 50 from 700 to MAXTOKEN (inclusive) + for i in range(700, MAX_TOKENS+1, 50): + await validate_model(i) + + # Lets do the baseline + if csv_file_path != None: + await validate_model(MAX_TOKENS, withoutInstructAndInput=True) + + else: + # We validate in increments of 100 from 8000 to MAXTOKEN (inclusive) + if MAX_TOKENS > 8000: + for i in range(MIN_TOKENS, MAX_TOKENS+1, 100): + await validate_model(i) + else: + for i in range(MIN_TOKENS, MAX_TOKENS+1, 50): + await validate_model(i) + + # Print the end of model validation + print("###") + print("### Model validation end ###") + print("###") + +if __name__ == '__main__': + asyncio.run(main_function()) \ No newline at end of file diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml new file mode 100644 index 00000000..dca4c7ce --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml @@ -0,0 +1,410 @@ +# lightning.pytorch==2.0.2 +seed_everything: true +trainer: + # Configure the number of GPU, avaliable on your machine + accelerator: gpu + devices: auto + num_nodes: 1 + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_1 + + # Floating point precision for the model, because RWKV is built FOR bf16 + # you should pretty much never change this setting + precision: bf16 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'Stage-1-memory-finetune-1 (bs=256, train-ctx=512)' + # name: 'Echo-B-1B4 Foundation' + project: 'RWKV-Memory-Experiment' + tags: ['RWKV', 'memory-exp'] + id: null + save_dir: . + version: null + offline: false + dir: null + anonymous: null + log_model: false + experiment: null + prefix: '' + checkpoint_name: null + job_type: null + config: null + entity: null + reinit: null + group: null + notes: null + magic: null + config_exclude_keys: null + config_include_keys: null + mode: null + allow_val_change: null + resume: null + force: null + tensorboard: null + sync_tensorboard: null + monitor_gym: null + save_code: null + settings: null + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/v5-exp/memory-test/stage-1-memory-finetune/ + filename: null + + # Save the top/last K checkpoints + save_top_k: 2 + # Choose by the most recent checkpoints (time based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: true + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: 25 + every_n_epochs: null + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: 1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + target_batch_size: 256 + + # # Number of datasamples to accumulate before backproping, per GPU + # # this can't be used with target_batch_size. + # accumulate_grad_batches: -1 + + # Various other settings, you probably want to leave alone + fast_dev_run: false + limit_train_batches: null + limit_val_batches: null + limit_test_batches: null + limit_predict_batches: null + overfit_batches: 0.0 + val_check_interval: null + check_val_every_n_epoch: 1 + num_sanity_val_steps: 0 + log_every_n_steps: 1 + enable_checkpointing: null + enable_progress_bar: null + enable_model_summary: null + gradient_clip_val: 1.0 + gradient_clip_algorithm: null + deterministic: null + benchmark: null + inference_mode: true + use_distributed_sampler: true + profiler: null + detect_anomaly: false + barebones: false + plugins: null + sync_batchnorm: false + reload_dataloaders_every_n_epochs: 0 + default_root_dir: null + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/Echo-B-1B4-Stage2.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Learning rate of the training process + # --- + # Initia learning rate of the process + lr_init: 8e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + # + # NOTE: lr_final / lr_period does not work with warmup_steps + # and will be ignored (or replaced) with the warmup_steps logic instead + lr_final: 4e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # Experimental cutoff settings + # --- + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + # + # Leave it as a blank array to disable the feature + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + # + # Leave it as a blank array to disable the feature + ctx_len_warmup_steps: [] + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # We limit bptt_learning_range, to 1, to ensure high throughput within a multi-gpu setup. + # (by skipping some syncronization code). Additionally, as bptt learning should not be triggering + # anyway as the data sample should be within ctx size 99% of the time + bptt_learning: true + bptt_learning_range: 1 + + # various other settings you probably should leave alone + grad_cp: true + warmup_steps: -1 + layerwise_lr: true + dim_att: null + dim_ffn: null + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Skip tokens that are already learnt, and are below the target threshold + token_loss_threshold: 0.01 + # Perform token based drop out at random, to the target % rate + # token_dropout_rate: 0.0 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/v5-exp/memory-test/stage-1/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: json + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: ../notebook/rwkv-x-exp/v5-exp/memory-test/dataset + + # After loading the dataset, split out test data used for validation, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.01 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: -1 + max_token_size: 2048 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 2048 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: false + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['input_prefix', 'input', 'output_prefix', 'output', 'closing'] + # multi_column_prefix: ['', '', '', '', ''] + # multi_column_train_mask: [true, false, true, true, true] + # multi_column_separator: '' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_mask: false + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Prefix token masking + # + # The rationale behind this, is that the first X tokens should not be "backpropped" + # for any new training record. As its unfair to expect the model (or a human) make + # any resonable guesses at that stage. As such this is used to "mask" the first X tokens + # from the loss calculation, and thus not backpropped. + data_prefix_skip_mask: 0 + + # Additional source dataset params, used to grab subsets of the dataset + # --- + # source_dataset_params: + # language: en + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 256 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: -1 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: true + + # ---------------------------- + # Sort before packing, and reverse before saving + # ---------------------------- + + # Sort the dataset by length, useful to reduce gpu waiting time (also useful for RWKV long context coherence) + sort_by_length: true + sort_asc: false # Sort in ascending order, true = shortest first, false = longest first + + # Reverse the training dataset order before saving, this is useful for, + # optimizing dataset packing process, when using packing_in_sequence + # and sort_by_length desc order together + reverse_train_dataset_before_save: True + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/xyz/last.ckpt +ckpt_path: null From 325c53a5ad415362ed582e403648f108ac774511 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Tue, 23 Jan 2024 01:27:22 +0000 Subject: [PATCH 19/23] wip iteration --- .../memory-test/World-1B5-mem-finetune.ipynb | 7596 +++++++++++------ .../v5-exp/memory-test/stage-1-tune.yaml | 11 +- .../v5-exp/memory-test/stage-2-tune.yaml | 411 + 3 files changed, 5508 insertions(+), 2510 deletions(-) create mode 100644 notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb index 1420c5e2..c7d9aa6f 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -112,7 +112,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Finetune 1 : Dataset preperation\n", + "## Finetune 1 (0 -> 2k) : Dataset preperation\n", "\n", "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." ] @@ -1454,7 +1454,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Finetune 1 : The actual tune!" + "## Finetune 1 (0 -> 2k) : The actual tune!" ] }, { @@ -1886,7 +1886,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -2044,7 +2044,15 @@ "## Model validation for 2600 tokens : 22.692307692307693% similarity, with 590 matched token, and 2010 token mismatch\n", "## Model validation for 2650 tokens : 21.132075471698116% similarity, with 560 matched token, and 2090 token mismatch\n", "## Model validation for 2700 tokens : 20.14814814814815% similarity, with 544 matched token, and 2156 token mismatch\n", - "## Model validation for 2750 tokens : 18.654545454545453% similarity, with 513 matched token, and 2237 token mismatch\n" + "## Model validation for 2750 tokens : 18.654545454545453% similarity, with 513 matched token, and 2237 token mismatch\n", + "## Model validation for 2800 tokens : 17.892857142857142% similarity, with 501 matched token, and 2299 token mismatch\n", + "## Model validation for 2850 tokens : 17.192982456140353% similarity, with 490 matched token, and 2360 token mismatch\n", + "## Model validation for 2900 tokens : 16.10344827586207% similarity, with 467 matched token, and 2433 token mismatch\n", + "## Model validation for 2950 tokens : 15.050847457627118% similarity, with 444 matched token, and 2506 token mismatch\n", + "## Model validation for 3000 tokens : 13.900000000000002% similarity, with 417 matched token, and 2583 token mismatch\n", + "###\n", + "### Model validation end ###\n", + "###\n" ] } ], @@ -2058,12 +2066,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Finetune 2 - More data" + "## Finetune 2 (2k -> 4k) - More data" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -2071,2537 +2079,4810 @@ "output_type": "stream", "text": [ "## Generating word reptition dataset ##\n", - "Generated JSONL file with - 5 max words, 150 samples - at ./dataset/gen-word-5-count.jsonl\n", - "Generated JSONL file with - 35 max words, 150 samples - at ./dataset/gen-word-35-count.jsonl\n", - "Generated JSONL file with - 40 max words, 150 samples - at ./dataset/gen-word-40-count.jsonl\n", - "Generated JSONL file with - 10 max words, 150 samples - at ./dataset/gen-word-10-count.jsonl\n", - "Generated JSONL file with - 25 max words, 150 samples - at ./dataset/gen-word-25-count.jsonl\n", - "Generated JSONL file with - 60 max words, 150 samples - at ./dataset/gen-word-60-count.jsonl\n", - "Generated JSONL file with - 55 max words, 150 samples - at ./dataset/gen-word-55-count.jsonl\n", - "Generated JSONL file with - 20 max words, 150 samples - at ./dataset/gen-word-20-count.jsonl\n", - "Generated JSONL file with - 65 max words, 150 samples - at ./dataset/gen-word-65-count.jsonl\n", - "Generated JSONL file with - 70 max words, 150 samples - at ./dataset/gen-word-70-count.jsonl\n", - "Generated JSONL file with - 85 max words, 150 samples - at ./dataset/gen-word-85-count.jsonl\n", - "Generated JSONL file with - 95 max words, 150 samples - at ./dataset/gen-word-95-count.jsonl\n", - "Generated JSONL file with - 15 max words, 150 samples - at ./dataset/gen-word-15-count.jsonl\n", - "Generated JSONL file with - 110 max words, 150 samples - at ./dataset/gen-word-110-count.jsonl\n", - "Generated JSONL file with - 50 max words, 150 samples - at ./dataset/gen-word-50-count.jsonl\n", - "Generated JSONL file with - 45 max words, 150 samples - at ./dataset/gen-word-45-count.jsonl\n", - "Generated JSONL file with - 115 max words, 150 samples - at ./dataset/gen-word-115-count.jsonl\n", - "Generated JSONL file with - 30 max words, 150 samples - at ./dataset/gen-word-30-count.jsonl\n", - "Generated JSONL file with - 120 max words, 150 samples - at ./dataset/gen-word-120-count.jsonl\n", - "Generated JSONL file with - 100 max words, 150 samples - at ./dataset/gen-word-100-count.jsonl\n", - "Generated JSONL file with - 80 max words, 150 samples - at ./dataset/gen-word-80-count.jsonl\n", - "Generated JSONL file with - 145 max words, 150 samples - at ./dataset/gen-word-145-count.jsonl\n", - "Generated JSONL file with - 90 max words, 150 samples - at ./dataset/gen-word-90-count.jsonl\n", - "Generated JSONL file with - 75 max words, 150 samples - at ./dataset/gen-word-75-count.jsonl\n", - "Generated JSONL file with - 105 max words, 150 samples - at ./dataset/gen-word-105-count.jsonl\n", - "Generated JSONL file with - 170 max words, 150 samples - at ./dataset/gen-word-170-count.jsonl\n", - "Generated JSONL file with - 125 max words, 150 samples - at ./dataset/gen-word-125-count.jsonl\n", - "Generated JSONL file with - 135 max words, 150 samples - at ./dataset/gen-word-135-count.jsonl\n", - "Generated JSONL file with - 220 max words, 150 samples - at ./dataset/gen-word-220-count.jsonl\n", - "Generated JSONL file with - 165 max words, 150 samples - at ./dataset/gen-word-165-count.jsonl\n", - "Generated JSONL file with - 250 max words, 150 samples - at ./dataset/gen-word-250-count.jsonl\n", - "Generated JSONL file with - 130 max words, 150 samples - at ./dataset/gen-word-130-count.jsonl\n", - "Generated JSONL file with - 140 max words, 150 samples - at ./dataset/gen-word-140-count.jsonl\n", - "Generated JSONL file with - 255 max words, 150 samples - at ./dataset/gen-word-255-count.jsonl\n", - "Generated JSONL file with - 175 max words, 150 samples - at ./dataset/gen-word-175-count.jsonl\n", - "Generated JSONL file with - 150 max words, 150 samples - at ./dataset/gen-word-150-count.jsonl\n", - "Generated JSONL file with - 285 max words, 150 samples - at ./dataset/gen-word-285-count.jsonl\n", - "Generated JSONL file with - 190 max words, 150 samples - at ./dataset/gen-word-190-count.jsonl\n", - "Generated JSONL file with - 160 max words, 150 samples - at ./dataset/gen-word-160-count.jsonl\n", - "Generated JSONL file with - 195 max words, 150 samples - at ./dataset/gen-word-195-count.jsonl\n", - "Generated JSONL file with - 185 max words, 150 samples - at ./dataset/gen-word-185-count.jsonl\n", - "Generated JSONL file with - 155 max words, 150 samples - at ./dataset/gen-word-155-count.jsonl\n", - "Generated JSONL file with - 205 max words, 150 samples - at ./dataset/gen-word-205-count.jsonl\n", - "Generated JSONL file with - 290 max words, 150 samples - at ./dataset/gen-word-290-count.jsonl\n", - "Generated JSONL file with - 315 max words, 150 samples - at ./dataset/gen-word-315-count.jsonl\n", - "Generated JSONL file with - 180 max words, 150 samples - at ./dataset/gen-word-180-count.jsonl\n", - "Generated JSONL file with - 325 max words, 150 samples - at ./dataset/gen-word-325-count.jsonl\n", - "Generated JSONL file with - 210 max words, 150 samples - at ./dataset/gen-word-210-count.jsonl\n", - "Generated JSONL file with - 345 max words, 150 samples - at ./dataset/gen-word-345-count.jsonl\n", - "Generated JSONL file with - 200 max words, 150 samples - at ./dataset/gen-word-200-count.jsonl\n", - "Generated JSONL file with - 350 max words, 150 samples - at ./dataset/gen-word-350-count.jsonl\n", - "Generated JSONL file with - 225 max words, 150 samples - at ./dataset/gen-word-225-count.jsonl\n", - "Generated JSONL file with - 230 max words, 150 samples - at ./dataset/gen-word-230-count.jsonl\n", - "Generated JSONL file with - 335 max words, 150 samples - at ./dataset/gen-word-335-count.jsonl\n", - "Generated JSONL file with - 330 max words, 150 samples - at ./dataset/gen-word-330-count.jsonl\n", - "Generated JSONL file with - 215 max words, 150 samples - at ./dataset/gen-word-215-count.jsonl\n", - "Generated JSONL file with - 245 max words, 150 samples - at ./dataset/gen-word-245-count.jsonl\n", - "Generated JSONL file with - 380 max words, 150 samples - at ./dataset/gen-word-380-count.jsonl\n", - "Generated JSONL file with - 390 max words, 150 samples - at ./dataset/gen-word-390-count.jsonl\n", - "Generated JSONL file with - 265 max words, 150 samples - at ./dataset/gen-word-265-count.jsonl\n", - "Generated JSONL file with - 235 max words, 150 samples - at ./dataset/gen-word-235-count.jsonl\n", - "Generated JSONL file with - 310 max words, 150 samples - at ./dataset/gen-word-310-count.jsonl\n", - "Generated JSONL file with - 300 max words, 150 samples - at ./dataset/gen-word-300-count.jsonl\n", - "Generated JSONL file with - 270 max words, 150 samples - at ./dataset/gen-word-270-count.jsonl\n", - "Generated JSONL file with - 355 max words, 150 samples - at ./dataset/gen-word-355-count.jsonl\n", - "Generated JSONL file with - 375 max words, 150 samples - at ./dataset/gen-word-375-count.jsonl\n", - "Generated JSONL file with - 240 max words, 150 samples - at ./dataset/gen-word-240-count.jsonl\n", - "Generated JSONL file with - 275 max words, 150 samples - at ./dataset/gen-word-275-count.jsonl\n", - "Generated JSONL file with - 425 max words, 150 samples - at ./dataset/gen-word-425-count.jsonl\n", - "Generated JSONL file with - 260 max words, 150 samples - at ./dataset/gen-word-260-count.jsonl\n", - "Generated JSONL file with - 280 max words, 150 samples - at ./dataset/gen-word-280-count.jsonl\n", - "Generated JSONL file with - 305 max words, 150 samples - at ./dataset/gen-word-305-count.jsonl\n", - "Generated JSONL file with - 405 max words, 150 samples - at ./dataset/gen-word-405-count.jsonl\n", - "Generated JSONL file with - 320 max words, 150 samples - at ./dataset/gen-word-320-count.jsonl\n", - "Generated a single JSONL file with 1731 samples (100 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", - "Generated a single JSONL file with 3551 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", - "Generated a single JSONL file with 3767 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", - "Generated JSONL file with - 430 max words, 150 samples - at ./dataset/gen-word-430-count.jsonl\n", - "Generated JSONL file with - 340 max words, 150 samples - at ./dataset/gen-word-340-count.jsonl\n", - "Generated JSONL file with - 445 max words, 150 samples - at ./dataset/gen-word-445-count.jsonl\n", - "Generated a single JSONL file with 1538 samples (100 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", - "Generated a single JSONL file with 1980 samples (100 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", - "Generated JSONL file with - 495 max words, 150 samples - at ./dataset/gen-word-495-count.jsonl\n", - "Generated JSONL file with - 720 max words, 150 samples - at ./dataset/gen-word-720-count.jsonl\n", - "Generated JSONL file with - 665 max words, 150 samples - at ./dataset/gen-word-665-count.jsonl\n", - "Generated JSONL file with - 415 max words, 150 samples - at ./dataset/gen-word-415-count.jsonl\n", - "Generated JSONL file with - 410 max words, 150 samples - at ./dataset/gen-word-410-count.jsonl\n", - "Generated JSONL file with - 710 max words, 150 samples - at ./dataset/gen-word-710-count.jsonl\n", - "Generated JSONL file with - 810 max words, 150 samples - at ./dataset/gen-word-810-count.jsonl\n", - "Generated a single JSONL file with 4834 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", - "Generated a single JSONL file with 4068 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", - "Generated JSONL file with - 530 max words, 150 samples - at ./dataset/gen-word-530-count.jsonl\n", - "Generated a single JSONL file with 729 samples (100 token repeat) - 305 max words - at ./dataset/shuffle-word-305-count.jsonl\n", - "Generated JSONL file with - 470 max words, 150 samples - at ./dataset/gen-word-470-count.jsonl\n", - "Generated JSONL file with - 440 max words, 150 samples - at ./dataset/gen-word-440-count.jsonl\n", - "Generated JSONL file with - 365 max words, 150 samples - at ./dataset/gen-word-365-count.jsonl\n", - "Generated a single JSONL file with 920 samples (100 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", - "Generated JSONL file with - 730 max words, 150 samples - at ./dataset/gen-word-730-count.jsonl\n", - "Generated JSONL file with - 715 max words, 150 samples - at ./dataset/gen-word-715-count.jsonl\n", - "Generated a single JSONL file with 715 samples (100 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", - "Generated JSONL file with - 500 max words, 150 samples - at ./dataset/gen-word-500-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 335 max words - at ./dataset/shuffle-word-335-count.jsonl\n", - "Generated a single JSONL file with 923 samples (100 token repeat) - 265 max words - at ./dataset/shuffle-word-265-count.jsonl\n", - "Generated a single JSONL file with 700 samples (100 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", - "Generated JSONL file with - 750 max words, 150 samples - at ./dataset/gen-word-750-count.jsonl\n", - "Generated JSONL file with - 435 max words, 150 samples - at ./dataset/gen-word-435-count.jsonl\n", - "Generated JSONL file with - 700 max words, 150 samples - at ./dataset/gen-word-700-count.jsonl\n", - "Generated JSONL file with - 510 max words, 150 samples - at ./dataset/gen-word-510-count.jsonl\n", - "Generated JSONL file with - 925 max words, 150 samples - at ./dataset/gen-word-925-count.jsonl\n", - "Generated JSONL file with - 370 max words, 150 samples - at ./dataset/gen-word-370-count.jsonl\n", - "Generated a single JSONL file with 705 samples (100 token repeat) - 325 max words - at ./dataset/shuffle-word-325-count.jsonl\n", - "Generated JSONL file with - 620 max words, 150 samples - at ./dataset/gen-word-620-count.jsonl\n", - "Generated JSONL file with - 675 max words, 150 samples - at ./dataset/gen-word-675-count.jsonl\n", - "Generated JSONL file with - 480 max words, 150 samples - at ./dataset/gen-word-480-count.jsonl\n", - "Generated JSONL file with - 395 max words, 150 samples - at ./dataset/gen-word-395-count.jsonl\n", - "Generated JSONL file with - 760 max words, 150 samples - at ./dataset/gen-word-760-count.jsonl\n", - "Generated JSONL file with - 295 max words, 150 samples - at ./dataset/gen-word-295-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", - "Generated a single JSONL file with 6539 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", - "Generated JSONL file with - 490 max words, 150 samples - at ./dataset/gen-word-490-count.jsonl\n", - "Generated JSONL file with - 850 max words, 150 samples - at ./dataset/gen-word-850-count.jsonl\n", - "Generated JSONL file with - 955 max words, 150 samples - at ./dataset/gen-word-955-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", - "Generated a single JSONL file with 2944 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", - "Generated JSONL file with - 965 max words, 150 samples - at ./dataset/gen-word-965-count.jsonl\n", - "Generated JSONL file with - 635 max words, 150 samples - at ./dataset/gen-word-635-count.jsonl\n", - "Generated a single JSONL file with 919 samples (100 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", - "Generated JSONL file with - 805 max words, 150 samples - at ./dataset/gen-word-805-count.jsonl\n", - "Generated JSONL file with - 420 max words, 150 samples - at ./dataset/gen-word-420-count.jsonl\n", - "Generated a single JSONL file with 4381 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", - "Generated JSONL file with - 775 max words, 150 samples - at ./dataset/gen-word-775-count.jsonl\n", - "Generated a single JSONL file with 3117 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", - "Generated a single JSONL file with 589 samples (100 token repeat) - 475 max words - at ./dataset/shuffle-word-475-count.jsonl\n", - "Generated JSONL file with - 705 max words, 150 samples - at ./dataset/gen-word-705-count.jsonl\n", - "Generated JSONL file with - 825 max words, 150 samples - at ./dataset/gen-word-825-count.jsonl\n", - "Generated a single JSONL file with 5256 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", - "Generated JSONL file with - 605 max words, 150 samples - at ./dataset/gen-word-605-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", - "Generated JSONL file with - 610 max words, 150 samples - at ./dataset/gen-word-610-count.jsonl\n", - "Generated JSONL file with - 860 max words, 150 samples - at ./dataset/gen-word-860-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 385 max words - at ./dataset/shuffle-word-385-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", - "Generated a single JSONL file with 703 samples (100 token repeat) - 375 max words - at ./dataset/shuffle-word-375-count.jsonl\n", - "Generated a single JSONL file with 498 samples (100 token repeat) - 525 max words - at ./dataset/shuffle-word-525-count.jsonl\n", - "Generated JSONL file with - 560 max words, 150 samples - at ./dataset/gen-word-560-count.jsonl\n", - "Generated a single JSONL file with 594 samples (100 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", - "Generated a single JSONL file with 927 samples (100 token repeat) - 255 max words - at ./dataset/shuffle-word-255-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", - "Generated a single JSONL file with 582 samples (100 token repeat) - 445 max words - at ./dataset/shuffle-word-445-count.jsonl\n", - "Generated JSONL file with - 735 max words, 150 samples - at ./dataset/gen-word-735-count.jsonl\n", - "Generated JSONL file with - 475 max words, 150 samples - at ./dataset/gen-word-475-count.jsonl\n", - "Generated a single JSONL file with 322 samples (100 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", - "Generated JSONL file with - 820 max words, 150 samples - at ./dataset/gen-word-820-count.jsonl\n", - "Generated JSONL file with - 695 max words, 150 samples - at ./dataset/gen-word-695-count.jsonl\n", - "Generated JSONL file with - 540 max words, 150 samples - at ./dataset/gen-word-540-count.jsonl\n", - "Generated JSONL file with - 385 max words, 150 samples - at ./dataset/gen-word-385-count.jsonl\n", - "Generated JSONL file with - 670 max words, 150 samples - at ./dataset/gen-word-670-count.jsonl\n", - "Generated a single JSONL file with 587 samples (100 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", - "Generated JSONL file with - 625 max words, 150 samples - at ./dataset/gen-word-625-count.jsonl\n", - "Generated JSONL file with - 520 max words, 150 samples - at ./dataset/gen-word-520-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 905 max words - at ./dataset/shuffle-word-905-count.jsonl\n", - "Generated JSONL file with - 800 max words, 150 samples - at ./dataset/gen-word-800-count.jsonl\n", - "Generated JSONL file with - 755 max words, 150 samples - at ./dataset/gen-word-755-count.jsonl\n", - "Generated JSONL file with - 830 max words, 150 samples - at ./dataset/gen-word-830-count.jsonl\n", - "Generated JSONL file with - 615 max words, 150 samples - at ./dataset/gen-word-615-count.jsonl\n", - "Generated JSONL file with - 575 max words, 150 samples - at ./dataset/gen-word-575-count.jsonl\n", - "Generated a single JSONL file with 397 samples (100 token repeat) - 755 max words - at ./dataset/shuffle-word-755-count.jsonl\n", - "Generated JSONL file with - 815 max words, 150 samples - at ./dataset/gen-word-815-count.jsonl\n", - "Generated JSONL file with - 505 max words, 150 samples - at ./dataset/gen-word-505-count.jsonl\n", - "Generated JSONL file with - 555 max words, 150 samples - at ./dataset/gen-word-555-count.jsonl\n", - "Generated a single JSONL file with 1381 samples (100 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", - "Generated JSONL file with - 680 max words, 150 samples - at ./dataset/gen-word-680-count.jsonl\n", - "Generated a single JSONL file with 698 samples (100 token repeat) - 395 max words - at ./dataset/shuffle-word-395-count.jsonl\n", - "Generated JSONL file with - 565 max words, 150 samples - at ./dataset/gen-word-565-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 715 max words - at ./dataset/shuffle-word-715-count.jsonl\n", - "Generated JSONL file with - 725 max words, 150 samples - at ./dataset/gen-word-725-count.jsonl\n", - "Generated JSONL file with - 515 max words, 150 samples - at ./dataset/gen-word-515-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", - "Generated JSONL file with - 795 max words, 150 samples - at ./dataset/gen-word-795-count.jsonl\n", - "Generated JSONL file with - 690 max words, 150 samples - at ./dataset/gen-word-690-count.jsonl\n", - "Generated a single JSONL file with 316 samples (100 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", - "Generated a single JSONL file with 312 samples (100 token repeat) - 875 max words - at ./dataset/shuffle-word-875-count.jsonl\n", - "Generated JSONL file with - 890 max words, 150 samples - at ./dataset/gen-word-890-count.jsonl\n", - "Generated JSONL file with - 855 max words, 150 samples - at ./dataset/gen-word-855-count.jsonl\n", - "Generated JSONL file with - 455 max words, 150 samples - at ./dataset/gen-word-455-count.jsonl\n", - "Generated a single JSONL file with 596 samples (100 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", - "Generated JSONL file with - 570 max words, 150 samples - at ./dataset/gen-word-570-count.jsonl\n", - "Generated a single JSONL file with 405 samples (100 token repeat) - 635 max words - at ./dataset/shuffle-word-635-count.jsonl\n", - "Generated JSONL file with - 895 max words, 150 samples - at ./dataset/gen-word-895-count.jsonl\n", - "Generated a single JSONL file with 402 samples (100 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", - "Generated JSONL file with - 780 max words, 150 samples - at ./dataset/gen-word-780-count.jsonl\n", - "Generated a single JSONL file with 704 samples (100 token repeat) - 345 max words - at ./dataset/shuffle-word-345-count.jsonl\n", - "Generated JSONL file with - 630 max words, 150 samples - at ./dataset/gen-word-630-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 795 max words - at ./dataset/shuffle-word-795-count.jsonl\n", - "Generated JSONL file with - 450 max words, 150 samples - at ./dataset/gen-word-450-count.jsonl\n", - "Generated a single JSONL file with 1623 samples (100 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", - "Generated JSONL file with - 990 max words, 150 samples - at ./dataset/gen-word-990-count.jsonl\n", - "Generated JSONL file with - 935 max words, 150 samples - at ./dataset/gen-word-935-count.jsonl\n", - "Generated a single JSONL file with 322 samples (100 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", - "Generated JSONL file with - 580 max words, 150 samples - at ./dataset/gen-word-580-count.jsonl\n", - "Generated a single JSONL file with 585 samples (100 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", - "Generated a single JSONL file with 497 samples (100 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 615 max words - at ./dataset/shuffle-word-615-count.jsonl\n", - "Generated a single JSONL file with 397 samples (100 token repeat) - 725 max words - at ./dataset/shuffle-word-725-count.jsonl\n", - "Generated JSONL file with - 840 max words, 150 samples - at ./dataset/gen-word-840-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 705 max words - at ./dataset/shuffle-word-705-count.jsonl\n", - "Generated a single JSONL file with 314 samples (100 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", - "Generated a single JSONL file with 405 samples (100 token repeat) - 605 max words - at ./dataset/shuffle-word-605-count.jsonl\n", - "Generated a single JSONL file with 933 samples (100 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", - "Generated a single JSONL file with 402 samples (100 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", - "Generated a single JSONL file with 314 samples (100 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", - "Generated JSONL file with - 885 max words, 150 samples - at ./dataset/gen-word-885-count.jsonl\n", - "Generated a single JSONL file with 3310 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", - "Generated a single JSONL file with 317 samples (100 token repeat) - 805 max words - at ./dataset/shuffle-word-805-count.jsonl\n", - "Generated JSONL file with - 845 max words, 150 samples - at ./dataset/gen-word-845-count.jsonl\n", - "Generated JSONL file with - 880 max words, 150 samples - at ./dataset/gen-word-880-count.jsonl\n", - "Generated JSONL file with - 465 max words, 150 samples - at ./dataset/gen-word-465-count.jsonl\n", - "Generated JSONL file with - 550 max words, 150 samples - at ./dataset/gen-word-550-count.jsonl\n", - "Generated JSONL file with - 960 max words, 150 samples - at ./dataset/gen-word-960-count.jsonl\n", - "Generated a single JSONL file with 581 samples (100 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", - "Generated JSONL file with - 865 max words, 150 samples - at ./dataset/gen-word-865-count.jsonl\n", - "Generated JSONL file with - 745 max words, 150 samples - at ./dataset/gen-word-745-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 955 max words - at ./dataset/shuffle-word-955-count.jsonl\n", - "Generated a single JSONL file with 404 samples (100 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", - "Generated a single JSONL file with 498 samples (100 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", - "Generated JSONL file with - 770 max words, 150 samples - at ./dataset/gen-word-770-count.jsonl\n", - "Generated JSONL file with - 590 max words, 150 samples - at ./dataset/gen-word-590-count.jsonl\n", - "Generated JSONL file with - 915 max words, 150 samples - at ./dataset/gen-word-915-count.jsonl\n", - "Generated a single JSONL file with 916 samples (100 token repeat) - 295 max words - at ./dataset/shuffle-word-295-count.jsonl\n", - "Generated JSONL file with - 650 max words, 150 samples - at ./dataset/gen-word-650-count.jsonl\n", - "Generated JSONL file with - 790 max words, 150 samples - at ./dataset/gen-word-790-count.jsonl\n", - "Generated a single JSONL file with 405 samples (100 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", - "Generated a single JSONL file with 317 samples (100 token repeat) - 845 max words - at ./dataset/shuffle-word-845-count.jsonl\n", - "Generated JSONL file with - 870 max words, 150 samples - at ./dataset/gen-word-870-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", - "Generated JSONL file with - 835 max words, 150 samples - at ./dataset/gen-word-835-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", - "Generated JSONL file with - 905 max words, 150 samples - at ./dataset/gen-word-905-count.jsonl\n", - "Generated JSONL file with - 585 max words, 150 samples - at ./dataset/gen-word-585-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 765 max words - at ./dataset/shuffle-word-765-count.jsonl\n", - "Generated JSONL file with - 1000 max words, 150 samples - at ./dataset/gen-word-1000-count.jsonl\n", - "Generated a single JSONL file with 322 samples (100 token repeat) - 815 max words - at ./dataset/shuffle-word-815-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", - "Generated JSONL file with - 685 max words, 150 samples - at ./dataset/gen-word-685-count.jsonl\n", - "Generated JSONL file with - 535 max words, 150 samples - at ./dataset/gen-word-535-count.jsonl\n", - "Generated JSONL file with - 740 max words, 150 samples - at ./dataset/gen-word-740-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 685 max words - at ./dataset/shuffle-word-685-count.jsonl\n", - "Generated JSONL file with - 900 max words, 150 samples - at ./dataset/gen-word-900-count.jsonl\n", - "Generated a single JSONL file with 1360 samples (100 token repeat) - 195 max words - at ./dataset/shuffle-word-195-count.jsonl\n", - "Generated a single JSONL file with 401 samples (100 token repeat) - 665 max words - at ./dataset/shuffle-word-665-count.jsonl\n", - "Generated a single JSONL file with 311 samples (100 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 745 max words - at ./dataset/shuffle-word-745-count.jsonl\n", - "Generated JSONL file with - 940 max words, 150 samples - at ./dataset/gen-word-940-count.jsonl\n", - "Generated a single JSONL file with 396 samples (100 token repeat) - 775 max words - at ./dataset/shuffle-word-775-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", - "Generated a single JSONL file with 1469 samples (100 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", - "Generated JSONL file with - 525 max words, 150 samples - at ./dataset/gen-word-525-count.jsonl\n", - "Generated a single JSONL file with 316 samples (100 token repeat) - 855 max words - at ./dataset/shuffle-word-855-count.jsonl\n", - "Generated a single JSONL file with 970 samples (100 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", - "Generated JSONL file with - 600 max words, 150 samples - at ./dataset/gen-word-600-count.jsonl\n", - "Generated a single JSONL file with 944 samples (100 token repeat) - 245 max words - at ./dataset/shuffle-word-245-count.jsonl\n", - "Generated a single JSONL file with 498 samples (100 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", - "Generated a single JSONL file with 401 samples (100 token repeat) - 645 max words - at ./dataset/shuffle-word-645-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 655 max words - at ./dataset/shuffle-word-655-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 915 max words - at ./dataset/shuffle-word-915-count.jsonl\n", - "Generated a single JSONL file with 321 samples (100 token repeat) - 865 max words - at ./dataset/shuffle-word-865-count.jsonl\n", - "Generated JSONL file with - 1065 max words, 150 samples - at ./dataset/gen-word-1065-count.jsonl\n", - "Generated a single JSONL file with 321 samples (100 token repeat) - 825 max words - at ./dataset/shuffle-word-825-count.jsonl\n", - "Generated a single JSONL file with 397 samples (100 token repeat) - 735 max words - at ./dataset/shuffle-word-735-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 925 max words - at ./dataset/shuffle-word-925-count.jsonl\n", - "Generated a single JSONL file with 17777 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", - "Generated JSONL file with - 945 max words, 150 samples - at ./dataset/gen-word-945-count.jsonl\n", - "Generated JSONL file with - 930 max words, 150 samples - at ./dataset/gen-word-930-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", - "Generated JSONL file with - 980 max words, 150 samples - at ./dataset/gen-word-980-count.jsonl\n", - "Generated JSONL file with - 360 max words, 150 samples - at ./dataset/gen-word-360-count.jsonl\n", - "Generated JSONL file with - 975 max words, 150 samples - at ./dataset/gen-word-975-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 585 max words - at ./dataset/shuffle-word-585-count.jsonl\n", - "Generated a single JSONL file with 318 samples (100 token repeat) - 835 max words - at ./dataset/shuffle-word-835-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 675 max words - at ./dataset/shuffle-word-675-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 975 max words - at ./dataset/shuffle-word-975-count.jsonl\n", - "Generated a single JSONL file with 1002 samples (100 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", - "Generated a single JSONL file with 984 samples (100 token repeat) - 235 max words - at ./dataset/shuffle-word-235-count.jsonl\n", - "Generated JSONL file with - 920 max words, 150 samples - at ./dataset/gen-word-920-count.jsonl\n", - "Generated JSONL file with - 1080 max words, 150 samples - at ./dataset/gen-word-1080-count.jsonl\n", - "Generated a single JSONL file with 496 samples (100 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", - "Generated JSONL file with - 400 max words, 150 samples - at ./dataset/gen-word-400-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", - "Generated a single JSONL file with 402 samples (100 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 535 max words - at ./dataset/shuffle-word-535-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 595 max words - at ./dataset/shuffle-word-595-count.jsonl\n", - "Generated JSONL file with - 1025 max words, 150 samples - at ./dataset/gen-word-1025-count.jsonl\n", - "Generated JSONL file with - 1005 max words, 150 samples - at ./dataset/gen-word-1005-count.jsonl\n", - "Generated JSONL file with - 460 max words, 150 samples - at ./dataset/gen-word-460-count.jsonl\n", - "Generated JSONL file with - 785 max words, 150 samples - at ./dataset/gen-word-785-count.jsonl\n", - "Generated a single JSONL file with 321 samples (100 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 985 max words - at ./dataset/shuffle-word-985-count.jsonl\n", - "Generated JSONL file with - 970 max words, 150 samples - at ./dataset/gen-word-970-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 945 max words - at ./dataset/shuffle-word-945-count.jsonl\n", - "Generated JSONL file with - 1185 max words, 150 samples - at ./dataset/gen-word-1185-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", - "Generated JSONL file with - 875 max words, 150 samples - at ./dataset/gen-word-875-count.jsonl\n", - "Generated JSONL file with - 660 max words, 150 samples - at ./dataset/gen-word-660-count.jsonl\n", - "Generated a single JSONL file with 996 samples (100 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", - "Generated JSONL file with - 1515 max words, 150 samples - at ./dataset/gen-word-1515-count.jsonl\n", - "Generated a single JSONL file with 318 samples (100 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", - "Generated JSONL file with - 485 max words, 150 samples - at ./dataset/gen-word-485-count.jsonl\n", - "Generated a single JSONL file with 310 samples (100 token repeat) - 885 max words - at ./dataset/shuffle-word-885-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", - "Generated JSONL file with - 1015 max words, 150 samples - at ./dataset/gen-word-1015-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 965 max words - at ./dataset/shuffle-word-965-count.jsonl\n", - "Generated JSONL file with - 1665 max words, 150 samples - at ./dataset/gen-word-1665-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1935 max words - at ./dataset/shuffle-word-1935-count.jsonl\n", - "Generated JSONL file with - 2055 max words, 125 samples - at ./dataset/gen-word-2055-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1895 max words - at ./dataset/shuffle-word-1895-count.jsonl\n", - "Generated a single JSONL file with 318 samples (100 token repeat) - 895 max words - at ./dataset/shuffle-word-895-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", - "Generated a single JSONL file with 701 samples (100 token repeat) - 365 max words - at ./dataset/shuffle-word-365-count.jsonl\n", - "Generated JSONL file with - 1935 max words, 150 samples - at ./dataset/gen-word-1935-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2095 max words - at ./dataset/shuffle-word-2095-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", - "Generated a single JSONL file with 251 samples (100 token repeat) - 1215 max words - at ./dataset/shuffle-word-1215-count.jsonl\n", - "Generated JSONL file with - 1905 max words, 150 samples - at ./dataset/gen-word-1905-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", - "Generated JSONL file with - 1895 max words, 150 samples - at ./dataset/gen-word-1895-count.jsonl\n", - "Generated a single JSONL file with 596 samples (100 token repeat) - 415 max words - at ./dataset/shuffle-word-415-count.jsonl\n", - "Generated JSONL file with - 1785 max words, 150 samples - at ./dataset/gen-word-1785-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", - "Generated JSONL file with - 2080 max words, 125 samples - at ./dataset/gen-word-2080-count.jsonl\n", - "Generated JSONL file with - 2000 max words, 150 samples - at ./dataset/gen-word-2000-count.jsonl\n", - "Generated JSONL file with - 995 max words, 150 samples - at ./dataset/gen-word-995-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 515 max words - at ./dataset/shuffle-word-515-count.jsonl\n", - "Generated a single JSONL file with 201 samples (100 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", - "Generated JSONL file with - 1835 max words, 150 samples - at ./dataset/gen-word-1835-count.jsonl\n", - "Generated JSONL file with - 1630 max words, 150 samples - at ./dataset/gen-word-1630-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1645 max words - at ./dataset/shuffle-word-1645-count.jsonl\n", - "Generated JSONL file with - 2100 max words, 125 samples - at ./dataset/gen-word-2100-count.jsonl\n", - "Generated a single JSONL file with 204 samples (100 token repeat) - 1345 max words - at ./dataset/shuffle-word-1345-count.jsonl\n", - "Generated JSONL file with - 1330 max words, 150 samples - at ./dataset/gen-word-1330-count.jsonl\n", - "Generated a single JSONL file with 918 samples (100 token repeat) - 285 max words - at ./dataset/shuffle-word-285-count.jsonl\n", - "Generated JSONL file with - 2105 max words, 125 samples - at ./dataset/gen-word-2105-count.jsonl\n", + "Generated JSONL file with - 5 max words, 500 samples - at ./dataset/gen-word-5-count.jsonl\n", + "Generated JSONL file with - 20 max words, 500 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 35 max words, 500 samples - at ./dataset/gen-word-35-count.jsonl\n", + "Generated JSONL file with - 190 max words, 50 samples - at ./dataset/gen-word-190-count.jsonl\n", + "Generated JSONL file with - 60 max words, 500 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 205 max words, 50 samples - at ./dataset/gen-word-205-count.jsonl\n", + "Generated JSONL file with - 10 max words, 500 samples - at ./dataset/gen-word-10-count.jsonl\n", + "Generated JSONL file with - 200 max words, 50 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 230 max words, 50 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 15 max words, 500 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 255 max words, 50 samples - at ./dataset/gen-word-255-count.jsonl\n", + "Generated JSONL file with - 110 max words, 50 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 235 max words, 50 samples - at ./dataset/gen-word-235-count.jsonl\n", + "Generated JSONL file with - 95 max words, 500 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 90 max words, 500 samples - at ./dataset/gen-word-90-count.jsonl\n", + "Generated JSONL file with - 135 max words, 50 samples - at ./dataset/gen-word-135-count.jsonl\n", + "Generated JSONL file with - 120 max words, 50 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 260 max words, 50 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated JSONL file with - 80 max words, 500 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 115 max words, 50 samples - at ./dataset/gen-word-115-count.jsonl\n", + "Generated JSONL file with - 25 max words, 500 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 105 max words, 50 samples - at ./dataset/gen-word-105-count.jsonl\n", + "Generated JSONL file with - 125 max words, 50 samples - at ./dataset/gen-word-125-count.jsonl\n", + "Generated JSONL file with - 140 max words, 50 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated JSONL file with - 195 max words, 50 samples - at ./dataset/gen-word-195-count.jsonl\n", + "Generated JSONL file with - 30 max words, 500 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 130 max words, 50 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 40 max words, 500 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 165 max words, 50 samples - at ./dataset/gen-word-165-count.jsonl\n", + "Generated JSONL file with - 145 max words, 50 samples - at ./dataset/gen-word-145-count.jsonl\n", + "Generated JSONL file with - 160 max words, 50 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated JSONL file with - 150 max words, 50 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 50 max words, 500 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 185 max words, 50 samples - at ./dataset/gen-word-185-count.jsonl\n", + "Generated JSONL file with - 155 max words, 50 samples - at ./dataset/gen-word-155-count.jsonl\n", + "Generated JSONL file with - 170 max words, 50 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 175 max words, 50 samples - at ./dataset/gen-word-175-count.jsonl\n", + "Generated JSONL file with - 180 max words, 50 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 70 max words, 500 samples - at ./dataset/gen-word-70-count.jsonl\n", + "Generated JSONL file with - 370 max words, 50 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated JSONL file with - 210 max words, 50 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 360 max words, 50 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated JSONL file with - 45 max words, 500 samples - at ./dataset/gen-word-45-count.jsonl\n", + "Generated JSONL file with - 250 max words, 50 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 225 max words, 50 samples - at ./dataset/gen-word-225-count.jsonl\n", + "Generated JSONL file with - 220 max words, 50 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 55 max words, 500 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 215 max words, 50 samples - at ./dataset/gen-word-215-count.jsonl\n", + "Generated JSONL file with - 275 max words, 50 samples - at ./dataset/gen-word-275-count.jsonl\n", + "Generated JSONL file with - 65 max words, 500 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 240 max words, 50 samples - at ./dataset/gen-word-240-count.jsonl\n", + "Generated JSONL file with - 245 max words, 50 samples - at ./dataset/gen-word-245-count.jsonl\n", + "Generated JSONL file with - 270 max words, 50 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 265 max words, 50 samples - at ./dataset/gen-word-265-count.jsonl\n", + "Generated JSONL file with - 75 max words, 500 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 300 max words, 50 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated JSONL file with - 280 max words, 50 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated a single JSONL file with 950 samples (50 token repeat) - 115 max words - at ./dataset/shuffle-word-115-count.jsonl\n", + "Generated JSONL file with - 290 max words, 50 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 310 max words, 50 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated JSONL file with - 305 max words, 50 samples - at ./dataset/gen-word-305-count.jsonl\n", + "Generated JSONL file with - 315 max words, 50 samples - at ./dataset/gen-word-315-count.jsonl\n", + "Generated a single JSONL file with 703 samples (50 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated JSONL file with - 85 max words, 500 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 330 max words, 50 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 100 max words, 500 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 335 max words, 50 samples - at ./dataset/gen-word-335-count.jsonl\n", + "Generated JSONL file with - 340 max words, 50 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 425 max words, 50 samples - at ./dataset/gen-word-425-count.jsonl\n", + "Generated a single JSONL file with 471 samples (50 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated a single JSONL file with 459 samples (50 token repeat) - 265 max words - at ./dataset/shuffle-word-265-count.jsonl\n", + "Generated JSONL file with - 345 max words, 50 samples - at ./dataset/gen-word-345-count.jsonl\n", + "Generated a single JSONL file with 466 samples (50 token repeat) - 255 max words - at ./dataset/shuffle-word-255-count.jsonl\n", + "Generated JSONL file with - 350 max words, 50 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated a single JSONL file with 1027 samples (50 token repeat) - 105 max words - at ./dataset/shuffle-word-105-count.jsonl\n", + "Generated JSONL file with - 355 max words, 50 samples - at ./dataset/gen-word-355-count.jsonl\n", + "Generated JSONL file with - 490 max words, 50 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated JSONL file with - 390 max words, 50 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated a single JSONL file with 920 samples (50 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated JSONL file with - 430 max words, 50 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 380 max words, 50 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated JSONL file with - 375 max words, 50 samples - at ./dataset/gen-word-375-count.jsonl\n", + "Generated JSONL file with - 440 max words, 50 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated JSONL file with - 385 max words, 50 samples - at ./dataset/gen-word-385-count.jsonl\n", + "Generated JSONL file with - 285 max words, 50 samples - at ./dataset/gen-word-285-count.jsonl\n", + "Generated JSONL file with - 435 max words, 50 samples - at ./dataset/gen-word-435-count.jsonl\n", + "Generated JSONL file with - 295 max words, 50 samples - at ./dataset/gen-word-295-count.jsonl\n", + "Generated JSONL file with - 395 max words, 50 samples - at ./dataset/gen-word-395-count.jsonl\n", + "Generated a single JSONL file with 737 samples (50 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated JSONL file with - 500 max words, 50 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated JSONL file with - 545 max words, 50 samples - at ./dataset/gen-word-545-count.jsonl\n", + "Generated JSONL file with - 580 max words, 50 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated JSONL file with - 405 max words, 50 samples - at ./dataset/gen-word-405-count.jsonl\n", + "Generated JSONL file with - 420 max words, 50 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated a single JSONL file with 469 samples (50 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonlGenerated JSONL file with - 400 max words, 50 samples - at ./dataset/gen-word-400-count.jsonl\n", + "\n", + "Generated JSONL file with - 320 max words, 50 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 555 max words, 50 samples - at ./dataset/gen-word-555-count.jsonl\n", + "Generated a single JSONL file with 459 samples (50 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "Generated JSONL file with - 540 max words, 50 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated JSONL file with - 515 max words, 50 samples - at ./dataset/gen-word-515-count.jsonl\n", + "Generated JSONL file with - 585 max words, 50 samples - at ./dataset/gen-word-585-count.jsonl\n", + "Generated JSONL file with - 475 max words, 50 samples - at ./dataset/gen-word-475-count.jsonl\n", + "Generated JSONL file with - 645 max words, 50 samples - at ./dataset/gen-word-645-count.jsonl\n", + "Generated a single JSONL file with 842 samples (50 token repeat) - 135 max words - at ./dataset/shuffle-word-135-count.jsonl\n", + "Generated JSONL file with - 530 max words, 50 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated JSONL file with - 525 max words, 50 samples - at ./dataset/gen-word-525-count.jsonl\n", + "Generated JSONL file with - 510 max words, 50 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 410 max words, 50 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated a single JSONL file with 295 samples (50 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated a single JSONL file with 459 samples (50 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated JSONL file with - 325 max words, 50 samples - at ./dataset/gen-word-325-count.jsonl\n", + "Generated JSONL file with - 570 max words, 50 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated JSONL file with - 655 max words, 50 samples - at ./dataset/gen-word-655-count.jsonl\n", + "Generated JSONL file with - 535 max words, 50 samples - at ./dataset/gen-word-535-count.jsonl\n", + "Generated JSONL file with - 445 max words, 50 samples - at ./dataset/gen-word-445-count.jsonl\n", + "Generated a single JSONL file with 364 samples (50 token repeat) - 305 max words - at ./dataset/shuffle-word-305-count.jsonl\n", + "Generated JSONL file with - 610 max words, 50 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated JSONL file with - 520 max words, 50 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated JSONL file with - 365 max words, 50 samples - at ./dataset/gen-word-365-count.jsonl\n", + "Generated a single JSONL file with 350 samples (50 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated a single JSONL file with 351 samples (50 token repeat) - 345 max words - at ./dataset/shuffle-word-345-count.jsonl\n", + "Generated a single JSONL file with 988 samples (50 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated a single JSONL file with 791 samples (50 token repeat) - 145 max words - at ./dataset/shuffle-word-145-count.jsonl\n", + "Generated JSONL file with - 605 max words, 50 samples - at ./dataset/gen-word-605-count.jsonl\n", + "Generated a single JSONL file with 299 samples (50 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated a single JSONL file with 349 samples (50 token repeat) - 385 max words - at ./dataset/shuffle-word-385-count.jsonl\n", + "Generated a single JSONL file with 772 samples (50 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated a single JSONL file with 809 samples (50 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonlGenerated a single JSONL file with 702 samples (50 token repeat) - 175 max words - at ./dataset/shuffle-word-175-count.jsonl\n", + "\n", + "Generated a single JSONL file with 862 samples (50 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated a single JSONL file with 693 samples (50 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated a single JSONL file with 894 samples (50 token repeat) - 125 max words - at ./dataset/shuffle-word-125-count.jsonl\n", + "Generated JSONL file with - 650 max words, 50 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated a single JSONL file with 544 samples (50 token repeat) - 205 max words - at ./dataset/shuffle-word-205-count.jsonl\n", + "Generated JSONL file with - 600 max words, 50 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated a single JSONL file with 507 samples (50 token repeat) - 215 max words - at ./dataset/shuffle-word-215-count.jsonl\n", + "Generated JSONL file with - 615 max words, 50 samples - at ./dataset/gen-word-615-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 535 max words - at ./dataset/shuffle-word-535-count.jsonl\n", + "Generated a single JSONL file with 681 samples (50 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated a single JSONL file with 720 samples (50 token repeat) - 165 max words - at ./dataset/shuffle-word-165-count.jsonl\n", + "Generated a single JSONL file with 504 samples (50 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated a single JSONL file with 499 samples (50 token repeat) - 225 max words - at ./dataset/shuffle-word-225-count.jsonl\n", + "Generated JSONL file with - 495 max words, 50 samples - at ./dataset/gen-word-495-count.jsonl\n", + "Generated a single JSONL file with 479 samples (50 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated a single JSONL file with 680 samples (50 token repeat) - 195 max words - at ./dataset/shuffle-word-195-count.jsonl\n", + "Generated JSONL file with - 630 max words, 50 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated a single JSONL file with 521 samples (50 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "Generated a single JSONL file with 494 samples (50 token repeat) - 235 max words - at ./dataset/shuffle-word-235-count.jsonl\n", + "Generated JSONL file with - 620 max words, 50 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated a single JSONL file with 685 samples (50 token repeat) - 185 max words - at ./dataset/shuffle-word-185-count.jsonl\n", + "Generated a single JSONL file with 499 samples (50 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated JSONL file with - 640 max words, 50 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated a single JSONL file with 461 samples (50 token repeat) - 275 max words - at ./dataset/shuffle-word-275-count.jsonl\n", + "Generated a single JSONL file with 666 samples (50 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated a single JSONL file with 473 samples (50 token repeat) - 245 max words - at ./dataset/shuffle-word-245-count.jsonl\n", + "Generated a single JSONL file with 352 samples (50 token repeat) - 325 max words - at ./dataset/shuffle-word-325-count.jsonl\n", + "Generated JSONL file with - 470 max words, 50 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated JSONL file with - 725 max words, 50 samples - at ./dataset/gen-word-725-count.jsonl\n", + "Generated a single JSONL file with 748 samples (50 token repeat) - 155 max words - at ./dataset/shuffle-word-155-count.jsonl\n", + "Generated a single JSONL file with 456 samples (50 token repeat) - 295 max words - at ./dataset/shuffle-word-295-count.jsonl\n", + "Generated a single JSONL file with 456 samples (50 token repeat) - 285 max words - at ./dataset/shuffle-word-285-count.jsonl\n", + "Generated a single JSONL file with 458 samples (50 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated JSONL file with - 705 max words, 50 samples - at ./dataset/gen-word-705-count.jsonl\n", + "Generated a single JSONL file with 353 samples (50 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated JSONL file with - 415 max words, 50 samples - at ./dataset/gen-word-415-count.jsonl\n", + "Generated a single JSONL file with 290 samples (50 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated JSONL file with - 465 max words, 50 samples - at ./dataset/gen-word-465-count.jsonl\n", + "Generated JSONL file with - 450 max words, 50 samples - at ./dataset/gen-word-450-count.jsonl\n", + "Generated JSONL file with - 670 max words, 50 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated JSONL file with - 505 max words, 50 samples - at ./dataset/gen-word-505-count.jsonl\n", + "Generated a single JSONL file with 353 samples (50 token repeat) - 355 max words - at ./dataset/shuffle-word-355-count.jsonl\n", + "Generated JSONL file with - 560 max words, 50 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated JSONL file with - 460 max words, 50 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated a single JSONL file with 352 samples (50 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated a single JSONL file with 293 samples (50 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated a single JSONL file with 351 samples (50 token repeat) - 395 max words - at ./dataset/shuffle-word-395-count.jsonl\n", + "Generated JSONL file with - 455 max words, 50 samples - at ./dataset/gen-word-455-count.jsonl\n", + "Generated a single JSONL file with 295 samples (50 token repeat) - 445 max words - at ./dataset/shuffle-word-445-count.jsonl\n", + "Generated JSONL file with - 710 max words, 50 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated a single JSONL file with 296 samples (50 token repeat) - 435 max words - at ./dataset/shuffle-word-435-count.jsonl\n", + "Generated JSONL file with - 715 max words, 50 samples - at ./dataset/gen-word-715-count.jsonl\n", + "Generated a single JSONL file with 249 samples (50 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated JSONL file with - 735 max words, 50 samples - at ./dataset/gen-word-735-count.jsonl\n", + "Generated a single JSONL file with 299 samples (50 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated JSONL file with - 660 max words, 50 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated JSONL file with - 485 max words, 50 samples - at ./dataset/gen-word-485-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 615 max words - at ./dataset/shuffle-word-615-count.jsonl\n", + "Generated JSONL file with - 910 max words, 50 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated a single JSONL file with 299 samples (50 token repeat) - 405 max words - at ./dataset/shuffle-word-405-count.jsonl\n", + "Generated a single JSONL file with 199 samples (50 token repeat) - 755 max words - at ./dataset/shuffle-word-755-count.jsonl\n", + "Generated JSONL file with - 665 max words, 50 samples - at ./dataset/gen-word-665-count.jsonl\n", + "Generated JSONL file with - 480 max words, 50 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 565 max words, 50 samples - at ./dataset/gen-word-565-count.jsonl\n", + "Generated JSONL file with - 690 max words, 50 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated a single JSONL file with 199 samples (50 token repeat) - 745 max words - at ./dataset/shuffle-word-745-count.jsonl\n", + "Generated JSONL file with - 590 max words, 50 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated a single JSONL file with 291 samples (50 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "Generated a single JSONL file with 294 samples (50 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated a single JSONL file with 298 samples (50 token repeat) - 425 max words - at ./dataset/shuffle-word-425-count.jsonl\n", + "Generated a single JSONL file with 249 samples (50 token repeat) - 575 max words - at ./dataset/shuffle-word-575-count.jsonl\n", + "Generated JSONL file with - 825 max words, 50 samples - at ./dataset/gen-word-825-count.jsonl\n", + "Generated JSONL file with - 785 max words, 50 samples - at ./dataset/gen-word-785-count.jsonl\n", + "Generated JSONL file with - 770 max words, 50 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated a single JSONL file with 296 samples (50 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", + "Generated a single JSONL file with 199 samples (50 token repeat) - 725 max words - at ./dataset/shuffle-word-725-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 595 max words - at ./dataset/shuffle-word-595-count.jsonl\n", + "Generated JSONL file with - 760 max words, 50 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated a single JSONL file with 295 samples (50 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated a single JSONL file with 202 samples (50 token repeat) - 635 max words - at ./dataset/shuffle-word-635-count.jsonl\n", + "Generated JSONL file with - 810 max words, 50 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated JSONL file with - 855 max words, 50 samples - at ./dataset/gen-word-855-count.jsonl\n", + "Generated a single JSONL file with 201 samples (50 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated JSONL file with - 595 max words, 50 samples - at ./dataset/gen-word-595-count.jsonl\n", + "Generated JSONL file with - 900 max words, 50 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated JSONL file with - 625 max words, 50 samples - at ./dataset/gen-word-625-count.jsonl\n", + "Generated a single JSONL file with 199 samples (50 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated a single JSONL file with 458 samples (50 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated JSONL file with - 685 max words, 50 samples - at ./dataset/gen-word-685-count.jsonl\n", + "Generated JSONL file with - 800 max words, 50 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated JSONL file with - 635 max words, 50 samples - at ./dataset/gen-word-635-count.jsonl\n", + "Generated a single JSONL file with 199 samples (50 token repeat) - 715 max words - at ./dataset/shuffle-word-715-count.jsonl\n", + "Generated a single JSONL file with 248 samples (50 token repeat) - 565 max words - at ./dataset/shuffle-word-565-count.jsonl\n", + "Generated JSONL file with - 550 max words, 50 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 505 max words - at ./dataset/shuffle-word-505-count.jsonl\n", + "Generated a single JSONL file with 352 samples (50 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated JSONL file with - 575 max words, 50 samples - at ./dataset/gen-word-575-count.jsonl\n", + "Generated a single JSONL file with 201 samples (50 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated JSONL file with - 815 max words, 50 samples - at ./dataset/gen-word-815-count.jsonl\n", + "Generated a single JSONL file with 249 samples (50 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated JSONL file with - 895 max words, 50 samples - at ./dataset/gen-word-895-count.jsonl\n", + "Generated a single JSONL file with 249 samples (50 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated JSONL file with - 750 max words, 50 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 585 max words - at ./dataset/shuffle-word-585-count.jsonl\n", + "Generated a single JSONL file with 295 samples (50 token repeat) - 495 max words - at ./dataset/shuffle-word-495-count.jsonl\n", + "Generated a single JSONL file with 249 samples (50 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 685 max words - at ./dataset/shuffle-word-685-count.jsonl\n", + "Generated JSONL file with - 920 max words, 50 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 740 max words, 50 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated a single JSONL file with 249 samples (50 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated JSONL file with - 905 max words, 50 samples - at ./dataset/gen-word-905-count.jsonl\n", + "Generated JSONL file with - 680 max words, 50 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated a single JSONL file with 353 samples (50 token repeat) - 335 max words - at ./dataset/shuffle-word-335-count.jsonl\n", + "Generated JSONL file with - 695 max words, 50 samples - at ./dataset/gen-word-695-count.jsonlGenerated JSONL file with - 720 max words, 50 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated a single JSONL file with 290 samples (50 token repeat) - 465 max words - at ./dataset/shuffle-word-465-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated a single JSONL file with 294 samples (50 token repeat) - 485 max words - at ./dataset/shuffle-word-485-count.jsonl\n", + "Generated JSONL file with - 700 max words, 50 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated a single JSONL file with 249 samples (50 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", + "Generated JSONL file with - 730 max words, 50 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated JSONL file with - 780 max words, 50 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated JSONL file with - 765 max words, 50 samples - at ./dataset/gen-word-765-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 50 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated JSONL file with - 930 max words, 50 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated JSONL file with - 935 max words, 50 samples - at ./dataset/gen-word-935-count.jsonl\n", + "Generated a single JSONL file with 159 samples (50 token repeat) - 805 max words - at ./dataset/shuffle-word-805-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 545 max words - at ./dataset/shuffle-word-545-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 50 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 154 samples (50 token repeat) - 815 max words - at ./dataset/shuffle-word-815-count.jsonl\n", + "Generated a single JSONL file with 163 samples (50 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 905 max words - at ./dataset/shuffle-word-905-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "\n", + "Generated JSONL file with - 835 max words, 50 samples - at ./dataset/gen-word-835-count.jsonl\n", + "Generated a single JSONL file with 351 samples (50 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 50 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated JSONL file with - 840 max words, 50 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated a single JSONL file with 357 samples (50 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated JSONL file with - 675 max words, 50 samples - at ./dataset/gen-word-675-count.jsonl\n", + "Generated a single JSONL file with 203 samples (50 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated a single JSONL file with 203 samples (50 token repeat) - 655 max words - at ./dataset/shuffle-word-655-count.jsonl\n", + "Generated a single JSONL file with 349 samples (50 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated JSONL file with - 745 max words, 50 samples - at ./dataset/gen-word-745-count.jsonl\n", + "Generated JSONL file with - 755 max words, 50 samples - at ./dataset/gen-word-755-count.jsonl\n", + "Generated a single JSONL file with 157 samples (50 token repeat) - 845 max words - at ./dataset/shuffle-word-845-count.jsonl\n", + "Generated a single JSONL file with 197 samples (50 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated JSONL file with - 875 max words, 50 samples - at ./dataset/gen-word-875-count.jsonl\n", + "Generated JSONL file with - 965 max words, 50 samples - at ./dataset/gen-word-965-count.jsonl\n", + "Generated JSONL file with - 820 max words, 50 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated JSONL file with - 1055 max words, 50 samples - at ./dataset/gen-word-1055-count.jsonl\n", + "Generated a single JSONL file with 351 samples (50 token repeat) - 365 max words - at ./dataset/shuffle-word-365-count.jsonl\n", + "Generated JSONL file with - 955 max words, 50 samples - at ./dataset/gen-word-955-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 625 max words - at ./dataset/shuffle-word-625-count.jsonl\n", + "Generated a single JSONL file with 148 samples (50 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated JSONL file with - 790 max words, 50 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", + "Generated JSONL file with - 850 max words, 50 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated JSONL file with - 885 max words, 50 samples - at ./dataset/gen-word-885-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 50 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated JSONL file with - 1005 max words, 50 samples - at ./dataset/gen-word-1005-count.jsonl\n", + "Generated JSONL file with - 805 max words, 50 samples - at ./dataset/gen-word-805-count.jsonl\n", + "Generated a single JSONL file with 161 samples (50 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated JSONL file with - 1105 max words, 50 samples - at ./dataset/gen-word-1105-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 50 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated JSONL file with - 845 max words, 50 samples - at ./dataset/gen-word-845-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 349 samples (50 token repeat) - 375 max words - at ./dataset/shuffle-word-375-count.jsonl\n", + "Generated JSONL file with - 795 max words, 50 samples - at ./dataset/gen-word-795-count.jsonl\n", + "Generated a single JSONL file with 351 samples (50 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated JSONL file with - 775 max words, 50 samples - at ./dataset/gen-word-775-count.jsonl\n", + "Generated a single JSONL file with 354 samples (50 token repeat) - 315 max words - at ./dataset/shuffle-word-315-count.jsonl\n", + "Generated JSONL file with - 830 max words, 50 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1015 max words - at ./dataset/shuffle-word-1015-count.jsonl\n", + "Generated JSONL file with - 1085 max words, 50 samples - at ./dataset/gen-word-1085-count.jsonl\n", + "Generated JSONL file with - 1045 max words, 50 samples - at ./dataset/gen-word-1045-count.jsonl\n", + "Generated a single JSONL file with 154 samples (50 token repeat) - 895 max words - at ./dataset/shuffle-word-895-count.jsonl\n", + "Generated a single JSONL file with 351 samples (50 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated a single JSONL file with 350 samples (50 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated a single JSONL file with 199 samples (50 token repeat) - 705 max words - at ./dataset/shuffle-word-705-count.jsonl\n", + "Generated JSONL file with - 980 max words, 50 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 50 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated JSONL file with - 1155 max words, 50 samples - at ./dataset/gen-word-1155-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 50 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated a single JSONL file with 295 samples (50 token repeat) - 455 max words - at ./dataset/shuffle-word-455-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 515 max words - at ./dataset/shuffle-word-515-count.jsonl\n", + "Generated JSONL file with - 860 max words, 50 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 985 max words, 50 samples - at ./dataset/gen-word-985-count.jsonl\n", + "Generated a single JSONL file with 157 samples (50 token repeat) - 885 max words - at ./dataset/shuffle-word-885-count.jsonl\n", + "Generated a single JSONL file with 202 samples (50 token repeat) - 645 max words - at ./dataset/shuffle-word-645-count.jsonl\n", + "Generated JSONL file with - 950 max words, 50 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1005 max words - at ./dataset/shuffle-word-1005-count.jsonl\n", + "Generated JSONL file with - 870 max words, 50 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated a single JSONL file with 198 samples (50 token repeat) - 785 max words - at ./dataset/shuffle-word-785-count.jsonl\n", + "Generated JSONL file with - 1035 max words, 50 samples - at ./dataset/gen-word-1035-count.jsonl\n", + "Generated JSONL file with - 1185 max words, 50 samples - at ./dataset/gen-word-1185-count.jsonl\n", + "Generated a single JSONL file with 248 samples (50 token repeat) - 525 max words - at ./dataset/shuffle-word-525-count.jsonl\n", + "Generated JSONL file with - 925 max words, 50 samples - at ./dataset/gen-word-925-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 555 max words - at ./dataset/shuffle-word-555-count.jsonl\n", + "Generated JSONL file with - 1195 max words, 50 samples - at ./dataset/gen-word-1195-count.jsonl\n", + "Generated a single JSONL file with 160 samples (50 token repeat) - 855 max words - at ./dataset/shuffle-word-855-count.jsonl\n", + "Generated a single JSONL file with 295 samples (50 token repeat) - 475 max words - at ./dataset/shuffle-word-475-count.jsonl\n", + "Generated JSONL file with - 1115 max words, 50 samples - at ./dataset/gen-word-1115-count.jsonl\n", + "Generated JSONL file with - 1015 max words, 50 samples - at ./dataset/gen-word-1015-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 50 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated JSONL file with - 1175 max words, 50 samples - at ./dataset/gen-word-1175-count.jsonl\n", + "Generated JSONL file with - 865 max words, 50 samples - at ./dataset/gen-word-865-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1105 max words - at ./dataset/shuffle-word-1105-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "Generated a single JSONL file with 201 samples (50 token repeat) - 675 max words - at ./dataset/shuffle-word-675-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 50 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated a single JSONL file with 203 samples (50 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated JSONL file with - 880 max words, 50 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated a single JSONL file with 157 samples (50 token repeat) - 835 max words - at ./dataset/shuffle-word-835-count.jsonl\n", + "Generated JSONL file with - 940 max words, 50 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1075 max words - at ./dataset/shuffle-word-1075-count.jsonl\n", + "Generated JSONL file with - 890 max words, 50 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 159 samples (50 token repeat) - 825 max words - at ./dataset/shuffle-word-825-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated a single JSONL file with 300 samples (50 token repeat) - 415 max words - at ./dataset/shuffle-word-415-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1135 max words - at ./dataset/shuffle-word-1135-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated JSONL file with - 995 max words, 50 samples - at ./dataset/gen-word-995-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 765 max words - at ./dataset/shuffle-word-765-count.jsonl\n", + "Generated a single JSONL file with 201 samples (50 token repeat) - 665 max words - at ./dataset/shuffle-word-665-count.jsonl\n", + "Generated JSONL file with - 1025 max words, 50 samples - at ./dataset/gen-word-1025-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1085 max words - at ./dataset/shuffle-word-1085-count.jsonl\n", + "Generated a single JSONL file with 201 samples (50 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated a single JSONL file with 160 samples (50 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated JSONL file with - 915 max words, 50 samples - at ./dataset/gen-word-915-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated a single JSONL file with 149 samples (50 token repeat) - 1185 max words - at ./dataset/shuffle-word-1185-count.jsonl\n", + "Generated a single JSONL file with 161 samples (50 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated JSONL file with - 990 max words, 50 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated a single JSONL file with 157 samples (50 token repeat) - 875 max words - at ./dataset/shuffle-word-875-count.jsonl\n", + "Generated JSONL file with - 960 max words, 50 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 915 max words - at ./dataset/shuffle-word-915-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 50 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated JSONL file with - 945 max words, 50 samples - at ./dataset/gen-word-945-count.jsonl\n", + "Generated a single JSONL file with 292 samples (50 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated JSONL file with - 975 max words, 50 samples - at ./dataset/gen-word-975-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1065 max words - at ./dataset/shuffle-word-1065-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 775 max words - at ./dataset/shuffle-word-775-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 695 max words - at ./dataset/shuffle-word-695-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated a single JSONL file with 250 samples (50 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 50 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 50 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 50 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 970 max words, 50 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated JSONL file with - 1365 max words, 50 samples - at ./dataset/gen-word-1365-count.jsonl\n", + "Generated a single JSONL file with 149 samples (50 token repeat) - 1195 max words - at ./dataset/shuffle-word-1195-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 50 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated a single JSONL file with 162 samples (50 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated JSONL file with - 1255 max words, 50 samples - at ./dataset/gen-word-1255-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 50 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 50 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 50 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated a single JSONL file with 123 samples (50 token repeat) - 1285 max words - at ./dataset/shuffle-word-1285-count.jsonl\n", + "Generated a single JSONL file with 199 samples (50 token repeat) - 735 max words - at ./dataset/shuffle-word-735-count.jsonl\n", + "Generated JSONL file with - 1135 max words, 50 samples - at ./dataset/gen-word-1135-count.jsonl\n", + "Generated JSONL file with - 1095 max words, 50 samples - at ./dataset/gen-word-1095-count.jsonl\n", + "Generated a single JSONL file with 159 samples (50 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated a single JSONL file with 197 samples (50 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 955 max words - at ./dataset/shuffle-word-955-count.jsonl\n", + "Generated a single JSONL file with 201 samples (50 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated a single JSONL file with 159 samples (50 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated JSONL file with - 1355 max words, 50 samples - at ./dataset/gen-word-1355-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 50 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated JSONL file with - 1065 max words, 50 samples - at ./dataset/gen-word-1065-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 50 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated JSONL file with - 1240 max words, 50 samples - at ./dataset/gen-word-1240-count.jsonl\n", + "Generated a single JSONL file with 160 samples (50 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated a single JSONL file with 201 samples (50 token repeat) - 605 max words - at ./dataset/shuffle-word-605-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated JSONL file with - 1145 max words, 50 samples - at ./dataset/gen-word-1145-count.jsonl\n", + "Generated JSONL file with - 1125 max words, 50 samples - at ./dataset/gen-word-1125-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 50 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated a single JSONL file with 202 samples (50 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated a single JSONL file with 123 samples (50 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1315 max words - at ./dataset/shuffle-word-1315-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 50 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 50 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated a single JSONL file with 159 samples (50 token repeat) - 865 max words - at ./dataset/shuffle-word-865-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 50 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated JSONL file with - 1315 max words, 50 samples - at ./dataset/gen-word-1315-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated JSONL file with - 1275 max words, 50 samples - at ./dataset/gen-word-1275-count.jsonl\n", + "Generated a single JSONL file with 199 samples (50 token repeat) - 795 max words - at ./dataset/shuffle-word-795-count.jsonl\n", + "Generated JSONL file with - 1385 max words, 50 samples - at ./dataset/gen-word-1385-count.jsonl\n", + "Generated JSONL file with - 1165 max words, 50 samples - at ./dataset/gen-word-1165-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 50 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated a single JSONL file with 149 samples (50 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated a single JSONL file with 158 samples (50 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 50 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 50 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated a single JSONL file with 149 samples (50 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 50 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 50 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated a single JSONL file with 159 samples (50 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 945 max words - at ./dataset/shuffle-word-945-count.jsonl\n", + "Generated JSONL file with - 1375 max words, 50 samples - at ./dataset/gen-word-1375-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 935 max words - at ./dataset/shuffle-word-935-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 925 max words - at ./dataset/shuffle-word-925-count.jsonl\n", + "Generated a single JSONL file with 129 samples (50 token repeat) - 1265 max words - at ./dataset/shuffle-word-1265-count.jsonl\n", + "Generated a single JSONL file with 200 samples (50 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1115 max words - at ./dataset/shuffle-word-1115-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1055 max words - at ./dataset/shuffle-word-1055-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated JSONL file with - 1245 max words, 50 samples - at ./dataset/gen-word-1245-count.jsonl\n", + "Generated JSONL file with - 1485 max words, 50 samples - at ./dataset/gen-word-1485-count.jsonl\n", + "Generated a single JSONL file with 128 samples (50 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated JSONL file with - 1345 max words, 50 samples - at ./dataset/gen-word-1345-count.jsonl\n", + "Generated a single JSONL file with 122 samples (50 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated a single JSONL file with 101 samples (50 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated a single JSONL file with 102 samples (50 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated a single JSONL file with 198 samples (50 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 149 samples (50 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated a single JSONL file with 149 samples (50 token repeat) - 1125 max words - at ./dataset/shuffle-word-1125-count.jsonl\n", + "Generated JSONL file with - 1405 max words, 50 samples - at ./dataset/gen-word-1405-count.jsonl\n", + "Generated JSONL file with - 1305 max words, 50 samples - at ./dataset/gen-word-1305-count.jsonl\n", + "Generated a single JSONL file with 101 samples (50 token repeat) - 1345 max words - at ./dataset/shuffle-word-1345-count.jsonl\n", + "Generated a single JSONL file with 104 samples (50 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated a single JSONL file with 101 samples (50 token repeat) - 1375 max words - at ./dataset/shuffle-word-1375-count.jsonl\n", + "Generated a single JSONL file with 135 samples (50 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1025 max words - at ./dataset/shuffle-word-1025-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 965 max words - at ./dataset/shuffle-word-965-count.jsonl\n", + "Generated JSONL file with - 1265 max words, 50 samples - at ./dataset/gen-word-1265-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 50 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated JSONL file with - 1205 max words, 50 samples - at ./dataset/gen-word-1205-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 50 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated a single JSONL file with 125 samples (50 token repeat) - 1235 max words - at ./dataset/shuffle-word-1235-count.jsonl\n", + "Generated a single JSONL file with 149 samples (50 token repeat) - 1165 max words - at ./dataset/shuffle-word-1165-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 50 samples - at ./dataset/gen-word-1320-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1035 max words - at ./dataset/shuffle-word-1035-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 975 max words - at ./dataset/shuffle-word-975-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 50 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 50 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated a single JSONL file with 102 samples (50 token repeat) - 1325 max words - at ./dataset/shuffle-word-1325-count.jsonl\n", + "Generated JSONL file with - 1225 max words, 50 samples - at ./dataset/gen-word-1225-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1435 max words - at ./dataset/shuffle-word-1435-count.jsonlGenerated JSONL file with - 1440 max words, 50 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "\n", + "Generated JSONL file with - 1310 max words, 50 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated JSONL file with - 1075 max words, 50 samples - at ./dataset/gen-word-1075-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated JSONL file with - 1235 max words, 50 samples - at ./dataset/gen-word-1235-count.jsonlGenerated a single JSONL file with 119 samples (50 token repeat) - 1245 max words - at ./dataset/shuffle-word-1245-count.jsonl\n", + "\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated JSONL file with - 1475 max words, 50 samples - at ./dataset/gen-word-1475-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated JSONL file with - 1215 max words, 50 samples - at ./dataset/gen-word-1215-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 50 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1455 max words - at ./dataset/shuffle-word-1455-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated a single JSONL file with 121 samples (50 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1095 max words - at ./dataset/shuffle-word-1095-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1175 max words - at ./dataset/shuffle-word-1175-count.jsonl\n", + "Generated a single JSONL file with 127 samples (50 token repeat) - 1205 max words - at ./dataset/shuffle-word-1205-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1355 max words - at ./dataset/shuffle-word-1355-count.jsonl\n", + "Generated JSONL file with - 1435 max words, 50 samples - at ./dataset/gen-word-1435-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 50 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated a single JSONL file with 102 samples (50 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 985 max words - at ./dataset/shuffle-word-985-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1395 max words, 50 samples - at ./dataset/gen-word-1395-count.jsonl\n", + "Generated a single JSONL file with 120 samples (50 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonlGenerated a single JSONL file with 150 samples (50 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 995 max words - at ./dataset/shuffle-word-995-count.jsonl\n", + "Generated JSONL file with - 1510 max words, 100 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1465 max words - at ./dataset/shuffle-word-1465-count.jsonl\n", + "Generated a single JSONL file with 123 samples (50 token repeat) - 1295 max words - at ./dataset/shuffle-word-1295-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 50 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated JSONL file with - 1675 max words, 100 samples - at ./dataset/gen-word-1675-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1145 max words - at ./dataset/shuffle-word-1145-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 50 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1045 max words - at ./dataset/shuffle-word-1045-count.jsonl\n", + "Generated JSONL file with - 1600 max words, 100 samples - at ./dataset/gen-word-1600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1445 max words - at ./dataset/shuffle-word-1445-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 50 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated a single JSONL file with 101 samples (50 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "Generated JSONL file with - 1465 max words, 50 samples - at ./dataset/gen-word-1465-count.jsonl\n", + "Generated JSONL file with - 1760 max words, 100 samples - at ./dataset/gen-word-1760-count.jsonl\n", + "Generated JSONL file with - 1335 max words, 50 samples - at ./dataset/gen-word-1335-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 50 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 50 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1495 max words - at ./dataset/shuffle-word-1495-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 50 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated JSONL file with - 1455 max words, 50 samples - at ./dataset/gen-word-1455-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1535 max words - at ./dataset/shuffle-word-1535-count.jsonl\n", + "Generated JSONL file with - 1425 max words, 50 samples - at ./dataset/gen-word-1425-count.jsonl\n", + "Generated JSONL file with - 1560 max words, 100 samples - at ./dataset/gen-word-1560-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1475 max words - at ./dataset/shuffle-word-1475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1525 max words - at ./dataset/shuffle-word-1525-count.jsonlGenerated a single JSONL file with 124 samples (50 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "\n", + "Generated JSONL file with - 1550 max words, 100 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "\n", + "Generated JSONL file with - 1605 max words, 100 samples - at ./dataset/gen-word-1605-count.jsonl\n", + "Generated a single JSONL file with 101 samples (50 token repeat) - 1365 max words - at ./dataset/shuffle-word-1365-count.jsonl\n", + "Generated JSONL file with - 1645 max words, 100 samples - at ./dataset/gen-word-1645-count.jsonl\n", + "Generated JSONL file with - 1525 max words, 100 samples - at ./dataset/gen-word-1525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1405 max words - at ./dataset/shuffle-word-1405-count.jsonl\n", + "Generated a single JSONL file with 102 samples (50 token repeat) - 1385 max words - at ./dataset/shuffle-word-1385-count.jsonl\n", + "Generated JSONL file with - 1750 max words, 100 samples - at ./dataset/gen-word-1750-count.jsonl\n", + "Generated JSONL file with - 1635 max words, 100 samples - at ./dataset/gen-word-1635-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 50 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 50 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated a single JSONL file with 127 samples (50 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated JSONL file with - 1285 max words, 50 samples - at ./dataset/gen-word-1285-count.jsonl\n", + "Generated JSONL file with - 1610 max words, 100 samples - at ./dataset/gen-word-1610-count.jsonl\n", + "Generated JSONL file with - 1655 max words, 100 samples - at ./dataset/gen-word-1655-count.jsonl\n", + "Generated a single JSONL file with 15669 samples (500 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated a single JSONL file with 129 samples (50 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1415 max words - at ./dataset/shuffle-word-1415-count.jsonl\n", + "Generated JSONL file with - 1710 max words, 100 samples - at ./dataset/gen-word-1710-count.jsonl\n", + "Generated JSONL file with - 1505 max words, 100 samples - at ./dataset/gen-word-1505-count.jsonl\n", + "Generated a single JSONL file with 17680 samples (500 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated JSONL file with - 1295 max words, 50 samples - at ./dataset/gen-word-1295-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 50 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated JSONL file with - 1540 max words, 100 samples - at ./dataset/gen-word-1540-count.jsonl\n", + "Generated a single JSONL file with 102 samples (50 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 50 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (50 token repeat) - 1155 max words - at ./dataset/shuffle-word-1155-count.jsonl\n", + "Generated JSONL file with - 1700 max words, 100 samples - at ./dataset/gen-word-1700-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 50 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 50 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated JSONL file with - 1715 max words, 100 samples - at ./dataset/gen-word-1715-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1425 max words - at ./dataset/shuffle-word-1425-count.jsonl\n", + "Generated JSONL file with - 1665 max words, 100 samples - at ./dataset/gen-word-1665-count.jsonl\n", + "Generated JSONL file with - 1495 max words, 50 samples - at ./dataset/gen-word-1495-count.jsonl\n", + "Generated JSONL file with - 1585 max words, 100 samples - at ./dataset/gen-word-1585-count.jsonl\n", + "Generated JSONL file with - 1325 max words, 50 samples - at ./dataset/gen-word-1325-count.jsonl\n", + "Generated JSONL file with - 1755 max words, 100 samples - at ./dataset/gen-word-1755-count.jsonl\n", + "Generated JSONL file with - 1530 max words, 100 samples - at ./dataset/gen-word-1530-count.jsonl\n", + "Generated JSONL file with - 1650 max words, 100 samples - at ./dataset/gen-word-1650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated JSONL file with - 1720 max words, 100 samples - at ./dataset/gen-word-1720-count.jsonl\n", + "Generated JSONL file with - 1740 max words, 100 samples - at ./dataset/gen-word-1740-count.jsonl\n", + "Generated JSONL file with - 2050 max words, 100 samples - at ./dataset/gen-word-2050-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", + "Generated JSONL file with - 1630 max words, 100 samples - at ./dataset/gen-word-1630-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1695 max words - at ./dataset/shuffle-word-1695-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1555 max words - at ./dataset/shuffle-word-1555-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1865 max words - at ./dataset/shuffle-word-1865-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2225 max words - at ./dataset/shuffle-word-2225-count.jsonl\n", - "Generated JSONL file with - 1455 max words, 150 samples - at ./dataset/gen-word-1455-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 935 max words - at ./dataset/shuffle-word-935-count.jsonl\n", - "Generated JSONL file with - 2225 max words, 125 samples - at ./dataset/gen-word-2225-count.jsonl\n", + "Generated JSONL file with - 1580 max words, 100 samples - at ./dataset/gen-word-1580-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonl\n", + "Generated JSONL file with - 1925 max words, 100 samples - at ./dataset/gen-word-1925-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1725 max words - at ./dataset/shuffle-word-1725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", + "Generated JSONL file with - 1845 max words, 100 samples - at ./dataset/gen-word-1845-count.jsonl\n", + "Generated a single JSONL file with 124 samples (50 token repeat) - 1215 max words - at ./dataset/shuffle-word-1215-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1755 max words - at ./dataset/shuffle-word-1755-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2135 max words - at ./dataset/shuffle-word-2135-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", - "Generated JSONL file with - 1525 max words, 150 samples - at ./dataset/gen-word-1525-count.jsonl\n", + "Generated a single JSONL file with 118 samples (50 token repeat) - 1275 max words - at ./dataset/shuffle-word-1275-count.jsonl\n", + "Generated JSONL file with - 1730 max words, 100 samples - at ./dataset/gen-word-1730-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1605 max words - at ./dataset/shuffle-word-1605-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1925 max words - at ./dataset/shuffle-word-1925-count.jsonl\n", - "Generated a single JSONL file with 913 samples (100 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", - "Generated JSONL file with - 910 max words, 150 samples - at ./dataset/gen-word-910-count.jsonl\n", - "Generated a single JSONL file with 318 samples (100 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2325 max words - at ./dataset/shuffle-word-2325-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1795 max words - at ./dataset/shuffle-word-1795-count.jsonl\n", - "Generated JSONL file with - 1550 max words, 150 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "Generated JSONL file with - 1915 max words, 100 samples - at ./dataset/gen-word-1915-count.jsonl\n", + "Generated a single JSONL file with 130 samples (50 token repeat) - 1225 max words - at ./dataset/shuffle-word-1225-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1545 max words - at ./dataset/shuffle-word-1545-count.jsonl\n", + "Generated JSONL file with - 1445 max words, 50 samples - at ./dataset/gen-word-1445-count.jsonlGenerated JSONL file with - 1790 max words, 100 samples - at ./dataset/gen-word-1790-count.jsonl\n", + "\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", + "Generated a single JSONL file with 133 samples (50 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated JSONL file with - 1695 max words, 100 samples - at ./dataset/gen-word-1695-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1505 max words - at ./dataset/shuffle-word-1505-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1565 max words - at ./dataset/shuffle-word-1565-count.jsonl\n", + "Generated JSONL file with - 1895 max words, 100 samples - at ./dataset/gen-word-1895-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", + "Generated JSONL file with - 1800 max words, 100 samples - at ./dataset/gen-word-1800-count.jsonl\n", + "Generated JSONL file with - 1885 max words, 100 samples - at ./dataset/gen-word-1885-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2235 max words - at ./dataset/shuffle-word-2235-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", + "Generated JSONL file with - 1780 max words, 100 samples - at ./dataset/gen-word-1780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1395 max words - at ./dataset/shuffle-word-1395-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1695 max words - at ./dataset/shuffle-word-1695-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1585 max words - at ./dataset/shuffle-word-1585-count.jsonl\n", - "Generated JSONL file with - 2305 max words, 125 samples - at ./dataset/gen-word-2305-count.jsonl\n", - "Generated JSONL file with - 1930 max words, 150 samples - at ./dataset/gen-word-1930-count.jsonl\n", - "Generated JSONL file with - 2315 max words, 125 samples - at ./dataset/gen-word-2315-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", + "Generated a single JSONL file with 148 samples (50 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated a single JSONL file with 101 samples (50 token repeat) - 1305 max words - at ./dataset/shuffle-word-1305-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1335 max words - at ./dataset/shuffle-word-1335-count.jsonl\n", + "Generated JSONL file with - 1785 max words, 100 samples - at ./dataset/gen-word-1785-count.jsonl\n", + "Generated JSONL file with - 1775 max words, 100 samples - at ./dataset/gen-word-1775-count.jsonl\n", + "Generated JSONL file with - 2055 max words, 100 samples - at ./dataset/gen-word-2055-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1895 max words - at ./dataset/shuffle-word-1895-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2155 max words - at ./dataset/shuffle-word-2155-count.jsonl\n", + "Generated JSONL file with - 1950 max words, 100 samples - at ./dataset/gen-word-1950-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1635 max words - at ./dataset/shuffle-word-1635-count.jsonlGenerated JSONL file with - 1570 max words, 100 samples - at ./dataset/gen-word-1570-count.jsonl\n", + "\n", + "Generated JSONL file with - 2150 max words, 100 samples - at ./dataset/gen-word-2150-count.jsonl\n", + "Generated JSONL file with - 1680 max words, 100 samples - at ./dataset/gen-word-1680-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", + "Generated JSONL file with - 1620 max words, 100 samples - at ./dataset/gen-word-1620-count.jsonl\n", + "Generated a single JSONL file with 121 samples (50 token repeat) - 1255 max words - at ./dataset/shuffle-word-1255-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", + "Generated JSONL file with - 1590 max words, 100 samples - at ./dataset/gen-word-1590-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", + "Generated JSONL file with - 2315 max words, 100 samples - at ./dataset/gen-word-2315-count.jsonl\n", + "Generated JSONL file with - 1865 max words, 100 samples - at ./dataset/gen-word-1865-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2045 max words - at ./dataset/shuffle-word-2045-count.jsonl\n", + "Generated JSONL file with - 1820 max words, 100 samples - at ./dataset/gen-word-1820-count.jsonl\n", + "Generated JSONL file with - 2095 max words, 100 samples - at ./dataset/gen-word-2095-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1775 max words - at ./dataset/shuffle-word-1775-count.jsonl\n", + "Generated JSONL file with - 1725 max words, 100 samples - at ./dataset/gen-word-1725-count.jsonlGenerated JSONL file with - 1935 max words, 100 samples - at ./dataset/gen-word-1935-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", + "Generated JSONL file with - 1685 max words, 100 samples - at ./dataset/gen-word-1685-count.jsonl\n", + "Generated JSONL file with - 1690 max words, 100 samples - at ./dataset/gen-word-1690-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1685 max words - at ./dataset/shuffle-word-1685-count.jsonl\n", + "Generated JSONL file with - 1415 max words, 50 samples - at ./dataset/gen-word-1415-count.jsonl\n", + "Generated JSONL file with - 2185 max words, 100 samples - at ./dataset/gen-word-2185-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", + "Generated JSONL file with - 2100 max words, 100 samples - at ./dataset/gen-word-2100-count.jsonl\n", + "Generated JSONL file with - 1840 max words, 100 samples - at ./dataset/gen-word-1840-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", + "Generated JSONL file with - 2105 max words, 100 samples - at ./dataset/gen-word-2105-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", + "Generated JSONL file with - 2010 max words, 100 samples - at ./dataset/gen-word-2010-count.jsonl\n", + "Generated JSONL file with - 2145 max words, 100 samples - at ./dataset/gen-word-2145-count.jsonl\n", + "\n", + "Generated JSONL file with - 1870 max words, 100 samples - at ./dataset/gen-word-1870-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 50 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2035 max words - at ./dataset/shuffle-word-2035-count.jsonl\n", + "Generated JSONL file with - 1520 max words, 100 samples - at ./dataset/gen-word-1520-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", + "Generated JSONL file with - 1965 max words, 100 samples - at ./dataset/gen-word-1965-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1645 max words - at ./dataset/shuffle-word-1645-count.jsonl\n", + "Generated JSONL file with - 2215 max words, 100 samples - at ./dataset/gen-word-2215-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1825 max words - at ./dataset/shuffle-word-1825-count.jsonlGenerated JSONL file with - 1660 max words, 100 samples - at ./dataset/gen-word-1660-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2225 max words - at ./dataset/shuffle-word-2225-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1905 max words - at ./dataset/shuffle-word-1905-count.jsonl\n", + "Generated JSONL file with - 1735 max words, 100 samples - at ./dataset/gen-word-1735-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", + "Generated JSONL file with - 1905 max words, 100 samples - at ./dataset/gen-word-1905-count.jsonl\n", + "\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonlGenerated JSONL file with - 2030 max words, 100 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "\n", + "Generated JSONL file with - 1880 max words, 100 samples - at ./dataset/gen-word-1880-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", - "Generated JSONL file with - 1595 max words, 150 samples - at ./dataset/gen-word-1595-count.jsonl\n", - "Generated JSONL file with - 1780 max words, 150 samples - at ./dataset/gen-word-1780-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", + "Generated JSONL file with - 1670 max words, 100 samples - at ./dataset/gen-word-1670-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", + "Generated JSONL file with - 1945 max words, 100 samples - at ./dataset/gen-word-1945-count.jsonl\n", + "Generated JSONL file with - 1855 max words, 100 samples - at ./dataset/gen-word-1855-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2115 max words - at ./dataset/shuffle-word-2115-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1745 max words - at ./dataset/shuffle-word-1745-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonlGenerated a single JSONL file with 199 samples (100 token repeat) - 2385 max words - at ./dataset/shuffle-word-2385-count.jsonl\n", + "Generated JSONL file with - 1805 max words, 100 samples - at ./dataset/gen-word-1805-count.jsonl\n", + "Generated JSONL file with - 1835 max words, 100 samples - at ./dataset/gen-word-1835-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2295 max words - at ./dataset/shuffle-word-2295-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 2275 max words - at ./dataset/shuffle-word-2275-count.jsonl\n", + "Generated a single JSONL file with 181 samples (100 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "\n", + "Generated a single JSONL file with 188 samples (100 token repeat) - 2465 max words - at ./dataset/shuffle-word-2465-count.jsonl\n", + "Generated JSONL file with - 1900 max words, 100 samples - at ./dataset/gen-word-1900-count.jsonlGenerated JSONL file with - 1615 max words, 100 samples - at ./dataset/gen-word-1615-count.jsonl\n", + "Generated JSONL file with - 1930 max words, 100 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "\n", + "Generated JSONL file with - 2495 max words, 100 samples - at ./dataset/gen-word-2495-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1715 max words - at ./dataset/shuffle-word-1715-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1995 max words - at ./dataset/shuffle-word-1995-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1865 max words - at ./dataset/shuffle-word-1865-count.jsonl\n", + "Generated JSONL file with - 2135 max words, 100 samples - at ./dataset/gen-word-2135-count.jsonl\n", + "Generated a single JSONL file with 100 samples (50 token repeat) - 1485 max words - at ./dataset/shuffle-word-1485-count.jsonlGenerated JSONL file with - 1575 max words, 100 samples - at ./dataset/gen-word-1575-count.jsonl\n", + "Generated JSONL file with - 1535 max words, 100 samples - at ./dataset/gen-word-1535-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", + "Generated JSONL file with - 2120 max words, 100 samples - at ./dataset/gen-word-2120-count.jsonl\n", + "Generated a single JSONL file with 196 samples (100 token repeat) - 2335 max words - at ./dataset/shuffle-word-2335-count.jsonl\n", + "Generated JSONL file with - 1515 max words, 100 samples - at ./dataset/gen-word-1515-count.jsonl\n", + "Generated JSONL file with - 2265 max words, 100 samples - at ./dataset/gen-word-2265-count.jsonl\n", + "Generated a single JSONL file with 151 samples (100 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2125 max words - at ./dataset/shuffle-word-2125-count.jsonl\n", + "Generated JSONL file with - 2005 max words, 100 samples - at ./dataset/gen-word-2005-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1965 max words - at ./dataset/shuffle-word-1965-count.jsonl\n", + "\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2065 max words - at ./dataset/shuffle-word-2065-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2095 max words - at ./dataset/shuffle-word-2095-count.jsonlGenerated JSONL file with - 2060 max words, 100 samples - at ./dataset/gen-word-2060-count.jsonl\n", + "\n", + "Generated JSONL file with - 1975 max words, 100 samples - at ./dataset/gen-word-1975-count.jsonl\n", + "Generated JSONL file with - 2285 max words, 100 samples - at ./dataset/gen-word-2285-count.jsonl\n", + "Generated JSONL file with - 2170 max words, 100 samples - at ./dataset/gen-word-2170-count.jsonl\n", + "\n", + "Generated JSONL file with - 1595 max words, 100 samples - at ./dataset/gen-word-1595-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1985 max words - at ./dataset/shuffle-word-1985-count.jsonl\n", + "Generated a single JSONL file with 176 samples (100 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonlGenerated JSONL file with - 2175 max words, 100 samples - at ./dataset/gen-word-2175-count.jsonl\n", + "Generated JSONL file with - 2225 max words, 100 samples - at ./dataset/gen-word-2225-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2205 max words - at ./dataset/shuffle-word-2205-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 2075 max words - at ./dataset/shuffle-word-2075-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", + "Generated JSONL file with - 2230 max words, 100 samples - at ./dataset/gen-word-2230-count.jsonl\n", + "Generated JSONL file with - 1810 max words, 100 samples - at ./dataset/gen-word-1810-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", + "Generated a single JSONL file with 197 samples (100 token repeat) - 2365 max words - at ./dataset/shuffle-word-2365-count.jsonl\n", + "\n", + "Generated a single JSONL file with 115 samples (100 token repeat) - 2665 max words - at ./dataset/shuffle-word-2665-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonlGenerated JSONL file with - 2065 max words, 100 samples - at ./dataset/gen-word-2065-count.jsonl\n", + "Generated JSONL file with - 2380 max words, 100 samples - at ./dataset/gen-word-2380-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", + "Generated JSONL file with - 1795 max words, 100 samples - at ./dataset/gen-word-1795-count.jsonl\n", + "Generated JSONL file with - 1565 max words, 100 samples - at ./dataset/gen-word-1565-count.jsonl\n", + "\n", + "\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1935 max words - at ./dataset/shuffle-word-1935-count.jsonl\n", + "Generated JSONL file with - 2515 max words, 100 samples - at ./dataset/gen-word-2515-count.jsonl\n", + "Generated a single JSONL file with 199 samples (100 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", + "Generated JSONL file with - 1815 max words, 100 samples - at ./dataset/gen-word-1815-count.jsonl\n", + "Generated JSONL file with - 2245 max words, 100 samples - at ./dataset/gen-word-2245-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", + "Generated JSONL file with - 2305 max words, 100 samples - at ./dataset/gen-word-2305-count.jsonl\n", + "Generated JSONL file with - 2350 max words, 100 samples - at ./dataset/gen-word-2350-count.jsonl\n", + "\n", + "Generated JSONL file with - 1640 max words, 100 samples - at ./dataset/gen-word-1640-count.jsonl\n", + "\n", + "Generated JSONL file with - 2340 max words, 100 samples - at ./dataset/gen-word-2340-count.jsonlGenerated a single JSONL file with 184 samples (100 token repeat) - 2475 max words - at ./dataset/shuffle-word-2475-count.jsonl\n", + "\n", + "\n", + "Generated a single JSONL file with 199 samples (100 token repeat) - 2345 max words - at ./dataset/shuffle-word-2345-count.jsonl\n", + "Generated JSONL file with - 2295 max words, 100 samples - at ./dataset/gen-word-2295-count.jsonl\n", + "Generated JSONL file with - 1705 max words, 100 samples - at ./dataset/gen-word-1705-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1885 max words - at ./dataset/shuffle-word-1885-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", + "Generated a single JSONL file with 199 samples (100 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", + "Generated JSONL file with - 2195 max words, 100 samples - at ./dataset/gen-word-2195-count.jsonl\n", + "Generated JSONL file with - 2255 max words, 100 samples - at ./dataset/gen-word-2255-count.jsonl\n", + "Generated JSONL file with - 2260 max words, 100 samples - at ./dataset/gen-word-2260-count.jsonl\n", + "Generated a single JSONL file with 192 samples (100 token repeat) - 2425 max words - at ./dataset/shuffle-word-2425-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2195 max words - at ./dataset/shuffle-word-2195-count.jsonl\n", + "Generated JSONL file with - 1555 max words, 100 samples - at ./dataset/gen-word-1555-count.jsonl\n", + "Generated JSONL file with - 2545 max words, 100 samples - at ./dataset/gen-word-2545-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1625 max words - at ./dataset/shuffle-word-1625-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1595 max words - at ./dataset/shuffle-word-1595-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", + "Generated JSONL file with - 2045 max words, 100 samples - at ./dataset/gen-word-2045-count.jsonl\n", + "Generated JSONL file with - 1910 max words, 100 samples - at ./dataset/gen-word-1910-count.jsonl\n", + "Generated JSONL file with - 2400 max words, 100 samples - at ./dataset/gen-word-2400-count.jsonl\n", + "\n", + "\n", + "Generated JSONL file with - 2025 max words, 100 samples - at ./dataset/gen-word-2025-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1925 max words - at ./dataset/shuffle-word-1925-count.jsonl\n", + "Generated a single JSONL file with 184 samples (100 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", + "Generated JSONL file with - 2695 max words, 100 samples - at ./dataset/gen-word-2695-count.jsonl\n", + "Generated JSONL file with - 1940 max words, 100 samples - at ./dataset/gen-word-1940-count.jsonl\n", + "Generated JSONL file with - 1850 max words, 100 samples - at ./dataset/gen-word-1850-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2175 max words - at ./dataset/shuffle-word-2175-count.jsonl\n", + "Generated JSONL file with - 1875 max words, 100 samples - at ./dataset/gen-word-1875-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", + "Generated JSONL file with - 1765 max words, 100 samples - at ./dataset/gen-word-1765-count.jsonl\n", + "Generated JSONL file with - 1745 max words, 100 samples - at ./dataset/gen-word-1745-count.jsonl\n", + "Generated a single JSONL file with 190 samples (100 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1705 max words - at ./dataset/shuffle-word-1705-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2085 max words - at ./dataset/shuffle-word-2085-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1805 max words - at ./dataset/shuffle-word-1805-count.jsonl\n", + "\n", + "Generated JSONL file with - 2585 max words, 100 samples - at ./dataset/gen-word-2585-count.jsonl\n", + "Generated JSONL file with - 1545 max words, 100 samples - at ./dataset/gen-word-1545-count.jsonl\n", + "Generated a single JSONL file with 140 samples (100 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", + "Generated JSONL file with - 1625 max words, 100 samples - at ./dataset/gen-word-1625-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonl\n", + "\n", + "Generated a single JSONL file with 184 samples (100 token repeat) - 2455 max words - at ./dataset/shuffle-word-2455-count.jsonl\n", + "Generated JSONL file with - 2580 max words, 100 samples - at ./dataset/gen-word-2580-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", + "Generated JSONL file with - 2040 max words, 100 samples - at ./dataset/gen-word-2040-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", + "Generated JSONL file with - 2540 max words, 100 samples - at ./dataset/gen-word-2540-count.jsonl\n", + "Generated JSONL file with - 2210 max words, 100 samples - at ./dataset/gen-word-2210-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1665 max words - at ./dataset/shuffle-word-1665-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1655 max words - at ./dataset/shuffle-word-1655-count.jsonl\n", + "Generated a single JSONL file with 104 samples (100 token repeat) - 2735 max words - at ./dataset/shuffle-word-2735-count.jsonl\n", + "Generated JSONL file with - 1920 max words, 100 samples - at ./dataset/gen-word-1920-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1675 max words - at ./dataset/shuffle-word-1675-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2075 max words - at ./dataset/shuffle-word-2075-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1815 max words - at ./dataset/shuffle-word-1815-count.jsonl\n", - "Generated JSONL file with - 2040 max words, 125 samples - at ./dataset/gen-word-2040-count.jsonl\n", - "Generated JSONL file with - 1925 max words, 150 samples - at ./dataset/gen-word-1925-count.jsonl\n", - "Generated a single JSONL file with 2056 samples (100 token repeat) - 105 max words - at ./dataset/shuffle-word-105-count.jsonl\n", - "Generated a single JSONL file with 317 samples (100 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", - "Generated JSONL file with - 2195 max words, 125 samples - at ./dataset/gen-word-2195-count.jsonl\n", - "Generated JSONL file with - 1020 max words, 150 samples - at ./dataset/gen-word-1020-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1915 max words - at ./dataset/shuffle-word-1915-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1825 max words - at ./dataset/shuffle-word-1825-count.jsonl\n", - "Generated JSONL file with - 1910 max words, 150 samples - at ./dataset/gen-word-1910-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1065 max words - at ./dataset/shuffle-word-1065-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", - "Generated a single JSONL file with 1347 samples (100 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", - "Generated a single JSONL file with 914 samples (100 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", - "Generated JSONL file with - 2090 max words, 125 samples - at ./dataset/gen-word-2090-count.jsonl\n", - "Generated JSONL file with - 1915 max words, 150 samples - at ./dataset/gen-word-1915-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2315 max words - at ./dataset/shuffle-word-2315-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2385 max words - at ./dataset/shuffle-word-2385-count.jsonl\n", - "Generated JSONL file with - 2320 max words, 125 samples - at ./dataset/gen-word-2320-count.jsonl\n", - "Generated a single JSONL file with 1416 samples (100 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", - "Generated JSONL file with - 1860 max words, 150 samples - at ./dataset/gen-word-1860-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2395 max words - at ./dataset/shuffle-word-2395-count.jsonl\n", - "Generated JSONL file with - 2095 max words, 125 samples - at ./dataset/gen-word-2095-count.jsonl\n", - "Generated a single JSONL file with 110 samples (100 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", - "Generated a single JSONL file with 186 samples (100 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", - "Generated JSONL file with - 2310 max words, 125 samples - at ./dataset/gen-word-2310-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1845 max words - at ./dataset/shuffle-word-1845-count.jsonl\n", - "Generated JSONL file with - 2260 max words, 125 samples - at ./dataset/gen-word-2260-count.jsonl\n", - "Generated JSONL file with - 1800 max words, 150 samples - at ./dataset/gen-word-1800-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 785 max words - at ./dataset/shuffle-word-785-count.jsonl\n", - "Generated JSONL file with - 2075 max words, 125 samples - at ./dataset/gen-word-2075-count.jsonl\n", - "Generated a single JSONL file with 590 samples (100 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", - "Generated JSONL file with - 1320 max words, 150 samples - at ./dataset/gen-word-1320-count.jsonl\n", - "Generated a single JSONL file with 115 samples (100 token repeat) - 2675 max words - at ./dataset/shuffle-word-2675-count.jsonl\n", - "Generated JSONL file with - 1795 max words, 150 samples - at ./dataset/gen-word-1795-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", - "Generated a single JSONL file with 106 samples (100 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", - "Generated JSONL file with - 545 max words, 150 samples - at ./dataset/gen-word-545-count.jsonl\n", - "Generated a single JSONL file with 13064 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1685 max words - at ./dataset/shuffle-word-1685-count.jsonl\n", - "Generated JSONL file with - 1985 max words, 150 samples - at ./dataset/gen-word-1985-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2745 max words - at ./dataset/shuffle-word-2745-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2035 max words - at ./dataset/shuffle-word-2035-count.jsonl\n", - "Generated a single JSONL file with 185 samples (100 token repeat) - 2495 max words - at ./dataset/shuffle-word-2495-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", - "Generated JSONL file with - 2405 max words, 125 samples - at ./dataset/gen-word-2405-count.jsonl\n", - "Generated JSONL file with - 2675 max words, 125 samples - at ./dataset/gen-word-2675-count.jsonl\n", - "Generated JSONL file with - 1205 max words, 150 samples - at ./dataset/gen-word-1205-count.jsonl\n", - "Generated JSONL file with - 2580 max words, 125 samples - at ./dataset/gen-word-2580-count.jsonl\n", - "Generated a single JSONL file with 1908 samples (100 token repeat) - 115 max words - at ./dataset/shuffle-word-115-count.jsonl\n", - "Generated JSONL file with - 2695 max words, 125 samples - at ./dataset/gen-word-2695-count.jsonl\n", - "Generated a single JSONL file with 1359 samples (100 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1905 max words - at ./dataset/shuffle-word-1905-count.jsonl\n", - "Generated JSONL file with - 2630 max words, 125 samples - at ./dataset/gen-word-2630-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", - "Generated a single JSONL file with 1585 samples (100 token repeat) - 145 max words - at ./dataset/shuffle-word-145-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", - "Generated JSONL file with - 2620 max words, 125 samples - at ./dataset/gen-word-2620-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1485 max words - at ./dataset/shuffle-word-1485-count.jsonl\n", - "Generated a single JSONL file with 114 samples (100 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", - "Generated JSONL file with - 2700 max words, 125 samples - at ./dataset/gen-word-2700-count.jsonl\n", - "Generated a single JSONL file with 700 samples (100 token repeat) - 355 max words - at ./dataset/shuffle-word-355-count.jsonl\n", - "Generated a single JSONL file with 180 samples (100 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", - "Generated a single JSONL file with 585 samples (100 token repeat) - 495 max words - at ./dataset/shuffle-word-495-count.jsonl\n", - "Generated JSONL file with - 2690 max words, 125 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated JSONL file with - 2395 max words, 100 samples - at ./dataset/gen-word-2395-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1855 max words - at ./dataset/shuffle-word-1855-count.jsonl\n", + "Generated JSONL file with - 2410 max words, 100 samples - at ./dataset/gen-word-2410-count.jsonl\n", + "Generated JSONL file with - 1860 max words, 100 samples - at ./dataset/gen-word-1860-count.jsonl\n", + "Generated JSONL file with - 1995 max words, 100 samples - at ./dataset/gen-word-1995-count.jsonl\n", + "Generated JSONL file with - 1955 max words, 100 samples - at ./dataset/gen-word-1955-count.jsonl\n", + "Generated JSONL file with - 2665 max words, 100 samples - at ./dataset/gen-word-2665-count.jsonl\n", + "Generated JSONL file with - 2130 max words, 100 samples - at ./dataset/gen-word-2130-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1575 max words - at ./dataset/shuffle-word-1575-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1975 max words - at ./dataset/shuffle-word-1975-count.jsonl\n", + "Generated JSONL file with - 2610 max words, 100 samples - at ./dataset/gen-word-2610-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1875 max words - at ./dataset/shuffle-word-1875-count.jsonl\n", + "Generated JSONL file with - 2165 max words, 100 samples - at ./dataset/gen-word-2165-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1735 max words - at ./dataset/shuffle-word-1735-count.jsonl\n", + "Generated JSONL file with - 1830 max words, 100 samples - at ./dataset/gen-word-1830-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1835 max words - at ./dataset/shuffle-word-1835-count.jsonl\n", - "Generated a single JSONL file with 185 samples (100 token repeat) - 2425 max words - at ./dataset/shuffle-word-2425-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2345 max words - at ./dataset/shuffle-word-2345-count.jsonl\n", - "Generated JSONL file with - 2635 max words, 125 samples - at ./dataset/gen-word-2635-count.jsonl\n", - "Generated a single JSONL file with 150 samples (100 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", - "Generated JSONL file with - 2425 max words, 125 samples - at ./dataset/gen-word-2425-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2355 max words - at ./dataset/shuffle-word-2355-count.jsonl\n", - "Generated a single JSONL file with 117 samples (100 token repeat) - 2695 max words - at ./dataset/shuffle-word-2695-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", - "Generated JSONL file with - 2045 max words, 125 samples - at ./dataset/gen-word-2045-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", - "Generated JSONL file with - 2380 max words, 125 samples - at ./dataset/gen-word-2380-count.jsonl\n", - "Generated JSONL file with - 2705 max words, 125 samples - at ./dataset/gen-word-2705-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2885 max words - at ./dataset/shuffle-word-2885-count.jsonl\n", - "Generated JSONL file with - 1845 max words, 150 samples - at ./dataset/gen-word-1845-count.jsonl\n", - "Generated JSONL file with - 2825 max words, 125 samples - at ./dataset/gen-word-2825-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2705 max words - at ./dataset/shuffle-word-2705-count.jsonl\n", - "Generated a single JSONL file with 114 samples (100 token repeat) - 2605 max words - at ./dataset/shuffle-word-2605-count.jsonl\n", - "Generated JSONL file with - 645 max words, 150 samples - at ./dataset/gen-word-645-count.jsonl\n", - "Generated JSONL file with - 2365 max words, 125 samples - at ./dataset/gen-word-2365-count.jsonl\n", - "Generated JSONL file with - 3090 max words, 100 samples - at ./dataset/gen-word-3090-count.jsonl\n", - "Generated JSONL file with - 2375 max words, 125 samples - at ./dataset/gen-word-2375-count.jsonl\n", - "Generated JSONL file with - 2290 max words, 125 samples - at ./dataset/gen-word-2290-count.jsonl\n", - "Generated a single JSONL file with 124 samples (100 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", - "Generated a single JSONL file with 122 samples (100 token repeat) - 2685 max words - at ./dataset/shuffle-word-2685-count.jsonl\n", - "Generated a single JSONL file with 148 samples (100 token repeat) - 2575 max words - at ./dataset/shuffle-word-2575-count.jsonl\n", - "Generated JSONL file with - 2325 max words, 125 samples - at ./dataset/gen-word-2325-count.jsonl\n", - "Generated JSONL file with - 3095 max words, 100 samples - at ./dataset/gen-word-3095-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", - "Generated a single JSONL file with 160 samples (100 token repeat) - 2515 max words - at ./dataset/shuffle-word-2515-count.jsonl\n", - "Generated JSONL file with - 2180 max words, 125 samples - at ./dataset/gen-word-2180-count.jsonl\n", - "Generated JSONL file with - 2395 max words, 125 samples - at ./dataset/gen-word-2395-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1035 max words - at ./dataset/shuffle-word-1035-count.jsonl\n", - "Generated JSONL file with - 2935 max words, 125 samples - at ./dataset/gen-word-2935-count.jsonl\n", - "Generated a single JSONL file with 921 samples (100 token repeat) - 275 max words - at ./dataset/shuffle-word-275-count.jsonl\n", - "Generated a single JSONL file with 156 samples (100 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3145 max words - at ./dataset/shuffle-word-3145-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3120 max words - at ./dataset/shuffle-word-3120-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3185 max words - at ./dataset/shuffle-word-3185-count.jsonl\n", - "Generated a single JSONL file with 106 samples (100 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3130 max words - at ./dataset/shuffle-word-3130-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3080 max words - at ./dataset/shuffle-word-3080-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", - "Generated JSONL file with - 2745 max words, 125 samples - at ./dataset/gen-word-2745-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3155 max words - at ./dataset/shuffle-word-3155-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", - "Generated JSONL file with - 2680 max words, 125 samples - at ./dataset/gen-word-2680-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3515 max words - at ./dataset/shuffle-word-3515-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3520 max words - at ./dataset/shuffle-word-3520-count.jsonl\n", - "Generated a single JSONL file with 1792 samples (100 token repeat) - 125 max words - at ./dataset/shuffle-word-125-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3005 max words - at ./dataset/shuffle-word-3005-count.jsonl\n", - "Generated JSONL file with - 3315 max words, 100 samples - at ./dataset/gen-word-3315-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3095 max words - at ./dataset/shuffle-word-3095-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2825 max words - at ./dataset/shuffle-word-2825-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3115 max words - at ./dataset/shuffle-word-3115-count.jsonl\n", - "Generated JSONL file with - 3160 max words, 100 samples - at ./dataset/gen-word-3160-count.jsonl\n", - "Generated a single JSONL file with 591 samples (100 token repeat) - 435 max words - at ./dataset/shuffle-word-435-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1055 max words - at ./dataset/shuffle-word-1055-count.jsonl\n", - "Generated JSONL file with - 1060 max words, 150 samples - at ./dataset/gen-word-1060-count.jsonl\n", - "Generated a single JSONL file with 104 samples (100 token repeat) - 2755 max words - at ./dataset/shuffle-word-2755-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3565 max words - at ./dataset/shuffle-word-3565-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", - "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3180 max words - at ./dataset/shuffle-word-3180-count.jsonl\n", - "Generated JSONL file with - 2760 max words, 125 samples - at ./dataset/gen-word-2760-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3065 max words - at ./dataset/shuffle-word-3065-count.jsonl\n", - "Generated JSONL file with - 2510 max words, 125 samples - at ./dataset/gen-word-2510-count.jsonl\n", - "Generated a single JSONL file with 55890 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", - "Generated JSONL file with - 1555 max words, 150 samples - at ./dataset/gen-word-1555-count.jsonl\n", - "Generated JSONL file with - 2235 max words, 125 samples - at ./dataset/gen-word-2235-count.jsonl\n", - "Generated a single JSONL file with 190 samples (100 token repeat) - 2435 max words - at ./dataset/shuffle-word-2435-count.jsonl\n", - "Generated a single JSONL file with 583 samples (100 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", - "Generated JSONL file with - 3010 max words, 100 samples - at ./dataset/gen-word-3010-count.jsonl\n", - "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", - "Generated a single JSONL file with 251 samples (100 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", - "Generated JSONL file with - 2330 max words, 125 samples - at ./dataset/gen-word-2330-count.jsonl\n", - "Generated JSONL file with - 2920 max words, 125 samples - at ./dataset/gen-word-2920-count.jsonl\n", - "Generated JSONL file with - 3515 max words, 100 samples - at ./dataset/gen-word-3515-count.jsonl\n", - "Generated a single JSONL file with 501 samples (100 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", - "Generated JSONL file with - 3495 max words, 100 samples - at ./dataset/gen-word-3495-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3040 max words - at ./dataset/shuffle-word-3040-count.jsonl\n", - "Generated JSONL file with - 1075 max words, 150 samples - at ./dataset/gen-word-1075-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 575 max words - at ./dataset/shuffle-word-575-count.jsonl\n", - "Generated JSONL file with - 1180 max words, 150 samples - at ./dataset/gen-word-1180-count.jsonl\n", - "Generated JSONL file with - 3295 max words, 100 samples - at ./dataset/gen-word-3295-count.jsonl\n", - "Generated JSONL file with - 3110 max words, 100 samples - at ./dataset/gen-word-3110-count.jsonl\n", - "Generated JSONL file with - 1170 max words, 150 samples - at ./dataset/gen-word-1170-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", - "Generated JSONL file with - 3430 max words, 100 samples - at ./dataset/gen-word-3430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3505 max words - at ./dataset/shuffle-word-3505-count.jsonl\n", - "Generated a single JSONL file with 1846 samples (100 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3535 max words - at ./dataset/shuffle-word-3535-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3195 max words - at ./dataset/shuffle-word-3195-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3355 max words - at ./dataset/shuffle-word-3355-count.jsonl\n", - "Generated a single JSONL file with 600 samples (100 token repeat) - 405 max words - at ./dataset/shuffle-word-405-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", - "Generated JSONL file with - 1440 max words, 150 samples - at ./dataset/gen-word-1440-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3560 max words - at ./dataset/shuffle-word-3560-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 545 max words - at ./dataset/shuffle-word-545-count.jsonl\n", - "Generated a single JSONL file with 1052 samples (100 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", - "Generated a single JSONL file with 584 samples (100 token repeat) - 485 max words - at ./dataset/shuffle-word-485-count.jsonl\n", - "Generated JSONL file with - 3270 max words, 100 samples - at ./dataset/gen-word-3270-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3215 max words - at ./dataset/shuffle-word-3215-count.jsonl\n", - "Generated JSONL file with - 2655 max words, 125 samples - at ./dataset/gen-word-2655-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3230 max words - at ./dataset/shuffle-word-3230-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3280 max words - at ./dataset/shuffle-word-3280-count.jsonl\n", + "Generated JSONL file with - 2080 max words, 100 samples - at ./dataset/gen-word-2080-count.jsonl\n", + "Generated a single JSONL file with 199 samples (100 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", + "Generated a single JSONL file with 116 samples (100 token repeat) - 2625 max words - at ./dataset/shuffle-word-2625-count.jsonl\n", + "Generated JSONL file with - 1770 max words, 100 samples - at ./dataset/gen-word-1770-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", "Generated a single JSONL file with 200 samples (100 token repeat) - 1515 max words - at ./dataset/shuffle-word-1515-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", - "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonl\n", - "Generated a single JSONL file with 704 samples (100 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", - "Generated JSONL file with - 3305 max words, 100 samples - at ./dataset/gen-word-3305-count.jsonl\n", - "Generated JSONL file with - 3310 max words, 100 samples - at ./dataset/gen-word-3310-count.jsonl\n", - "Generated a single JSONL file with 7570 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", - "Generated JSONL file with - 3115 max words, 100 samples - at ./dataset/gen-word-3115-count.jsonl\n", - "Generated a single JSONL file with 402 samples (100 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", - "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3270 max words - at ./dataset/shuffle-word-3270-count.jsonl\n", - "Generated JSONL file with - 3480 max words, 100 samples - at ./dataset/gen-word-3480-count.jsonl\n", - "Generated a single JSONL file with 587 samples (100 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", - "Generated JSONL file with - 1265 max words, 150 samples - at ./dataset/gen-word-1265-count.jsonl\n", - "Generated JSONL file with - 3445 max words, 100 samples - at ./dataset/gen-word-3445-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3445 max words - at ./dataset/shuffle-word-3445-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3285 max words - at ./dataset/shuffle-word-3285-count.jsonl\n", - "Generated JSONL file with - 3210 max words, 100 samples - at ./dataset/gen-word-3210-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3360 max words - at ./dataset/shuffle-word-3360-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1545 max words - at ./dataset/shuffle-word-1545-count.jsonl\n", - "Generated JSONL file with - 1090 max words, 150 samples - at ./dataset/gen-word-1090-count.jsonl\n", - "Generated a single JSONL file with 8738 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", - "Generated JSONL file with - 3260 max words, 100 samples - at ./dataset/gen-word-3260-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3385 max words - at ./dataset/shuffle-word-3385-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", - "Generated a single JSONL file with 706 samples (100 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3260 max words - at ./dataset/shuffle-word-3260-count.jsonl\n", - "Generated JSONL file with - 640 max words, 150 samples - at ./dataset/gen-word-640-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3595 max words - at ./dataset/shuffle-word-3595-count.jsonl\n", - "Generated JSONL file with - 1085 max words, 150 samples - at ./dataset/gen-word-1085-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1615 max words - at ./dataset/shuffle-word-1615-count.jsonl\n", + "Generated JSONL file with - 2320 max words, 100 samples - at ./dataset/gen-word-2320-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1785 max words - at ./dataset/shuffle-word-1785-count.jsonl\n", + "Generated JSONL file with - 1990 max words, 100 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 152 samples (100 token repeat) - 2565 max words - at ./dataset/shuffle-word-2565-count.jsonl\n", + "Generated JSONL file with - 2510 max words, 100 samples - at ./dataset/gen-word-2510-count.jsonl\n", + "Generated JSONL file with - 1825 max words, 100 samples - at ./dataset/gen-word-1825-count.jsonl\n", + "Generated JSONL file with - 2435 max words, 100 samples - at ./dataset/gen-word-2435-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", + "Generated JSONL file with - 2035 max words, 100 samples - at ./dataset/gen-word-2035-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1945 max words - at ./dataset/shuffle-word-1945-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2245 max words - at ./dataset/shuffle-word-2245-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", + "Generated JSONL file with - 2015 max words, 100 samples - at ./dataset/gen-word-2015-count.jsonl\n", + "Generated a single JSONL file with 199 samples (100 token repeat) - 2395 max words - at ./dataset/shuffle-word-2395-count.jsonl\n", + "Generated JSONL file with - 2615 max words, 100 samples - at ./dataset/gen-word-2615-count.jsonl\n", + "Generated JSONL file with - 2770 max words, 100 samples - at ./dataset/gen-word-2770-count.jsonl\n", + "Generated a single JSONL file with 199 samples (100 token repeat) - 2375 max words - at ./dataset/shuffle-word-2375-count.jsonl\n", + "Generated a single JSONL file with 144 samples (100 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", + "Generated JSONL file with - 2600 max words, 100 samples - at ./dataset/gen-word-2600-count.jsonl\n", + "Generated JSONL file with - 2560 max words, 100 samples - at ./dataset/gen-word-2560-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", + "Generated JSONL file with - 1970 max words, 100 samples - at ./dataset/gen-word-1970-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2005 max words - at ./dataset/shuffle-word-2005-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1765 max words - at ./dataset/shuffle-word-1765-count.jsonl\n", + "Generated JSONL file with - 2125 max words, 100 samples - at ./dataset/gen-word-2125-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1555 max words - at ./dataset/shuffle-word-1555-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2105 max words - at ./dataset/shuffle-word-2105-count.jsonl\n", + "Generated JSONL file with - 2000 max words, 100 samples - at ./dataset/gen-word-2000-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", + "Generated a single JSONL file with 37746 samples (500 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated JSONL file with - 1960 max words, 100 samples - at ./dataset/gen-word-1960-count.jsonl\n", + "Generated JSONL file with - 2110 max words, 100 samples - at ./dataset/gen-word-2110-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", + "Generated JSONL file with - 2160 max words, 100 samples - at ./dataset/gen-word-2160-count.jsonl\n", + "Generated JSONL file with - 2115 max words, 100 samples - at ./dataset/gen-word-2115-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", + "Generated JSONL file with - 2595 max words, 100 samples - at ./dataset/gen-word-2595-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1795 max words - at ./dataset/shuffle-word-1795-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2215 max words - at ./dataset/shuffle-word-2215-count.jsonl\n", + "Generated JSONL file with - 2590 max words, 100 samples - at ./dataset/gen-word-2590-count.jsonl\n", + "Generated JSONL file with - 2290 max words, 100 samples - at ./dataset/gen-word-2290-count.jsonl\n", + "Generated JSONL file with - 2140 max words, 100 samples - at ./dataset/gen-word-2140-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2025 max words - at ./dataset/shuffle-word-2025-count.jsonl\n", + "Generated JSONL file with - 2200 max words, 100 samples - at ./dataset/gen-word-2200-count.jsonl\n", + "Generated JSONL file with - 2180 max words, 100 samples - at ./dataset/gen-word-2180-count.jsonl\n", + "Generated JSONL file with - 2275 max words, 100 samples - at ./dataset/gen-word-2275-count.jsonl\n", + "Generated JSONL file with - 2575 max words, 100 samples - at ./dataset/gen-word-2575-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2015 max words - at ./dataset/shuffle-word-2015-count.jsonl\n", + "Generated a single JSONL file with 197 samples (100 token repeat) - 2305 max words - at ./dataset/shuffle-word-2305-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2165 max words - at ./dataset/shuffle-word-2165-count.jsonl\n", + "Generated a single JSONL file with 175 samples (100 token repeat) - 2435 max words - at ./dataset/shuffle-word-2435-count.jsonl\n", + "Generated JSONL file with - 1890 max words, 100 samples - at ./dataset/gen-word-1890-count.jsonl\n", + "Generated a single JSONL file with 197 samples (100 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (100 token repeat) - 2595 max words - at ./dataset/shuffle-word-2595-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", + "Generated a single JSONL file with 198 samples (100 token repeat) - 2315 max words - at ./dataset/shuffle-word-2315-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", + "Generated a single JSONL file with 183 samples (100 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", + "Generated JSONL file with - 2070 max words, 100 samples - at ./dataset/gen-word-2070-count.jsonl\n", + "Generated JSONL file with - 2300 max words, 100 samples - at ./dataset/gen-word-2300-count.jsonl\n", + "Generated a single JSONL file with 194 samples (100 token repeat) - 2325 max words - at ./dataset/shuffle-word-2325-count.jsonl\n", + "Generated JSONL file with - 1985 max words, 100 samples - at ./dataset/gen-word-1985-count.jsonl\n", + "Generated JSONL file with - 2020 max words, 100 samples - at ./dataset/gen-word-2020-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1955 max words - at ./dataset/shuffle-word-1955-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated JSONL file with - 2075 max words, 100 samples - at ./dataset/gen-word-2075-count.jsonl\n", + "Generated JSONL file with - 2530 max words, 100 samples - at ./dataset/gen-word-2530-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2145 max words - at ./dataset/shuffle-word-2145-count.jsonl\n", + "Generated JSONL file with - 2155 max words, 100 samples - at ./dataset/gen-word-2155-count.jsonl\n", + "Generated a single JSONL file with 159 samples (100 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", + "Generated JSONL file with - 1980 max words, 100 samples - at ./dataset/gen-word-1980-count.jsonl\n", + "Generated JSONL file with - 2425 max words, 100 samples - at ./dataset/gen-word-2425-count.jsonl\n", + "Generated JSONL file with - 2220 max words, 100 samples - at ./dataset/gen-word-2220-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 1915 max words - at ./dataset/shuffle-word-1915-count.jsonl\n", + "Generated a single JSONL file with 184 samples (100 token repeat) - 2415 max words - at ./dataset/shuffle-word-2415-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3045 max words - at ./dataset/shuffle-word-3045-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2055 max words - at ./dataset/shuffle-word-2055-count.jsonl\n", + "Generated JSONL file with - 2465 max words, 100 samples - at ./dataset/gen-word-2465-count.jsonl\n", + "Generated JSONL file with - 2620 max words, 100 samples - at ./dataset/gen-word-2620-count.jsonl\n", + "Generated JSONL file with - 2420 max words, 100 samples - at ./dataset/gen-word-2420-count.jsonl\n", + "Generated JSONL file with - 2355 max words, 100 samples - at ./dataset/gen-word-2355-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", + "Generated a single JSONL file with 199 samples (100 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2265 max words - at ./dataset/shuffle-word-2265-count.jsonl\n", + "Generated a single JSONL file with 119 samples (100 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", + "Generated JSONL file with - 2360 max words, 100 samples - at ./dataset/gen-word-2360-count.jsonl\n", + "Generated a single JSONL file with 156 samples (100 token repeat) - 2545 max words - at ./dataset/shuffle-word-2545-count.jsonl\n", + "Generated JSONL file with - 2330 max words, 100 samples - at ./dataset/gen-word-2330-count.jsonl\n", + "Generated a single JSONL file with 149 samples (100 token repeat) - 2505 max words - at ./dataset/shuffle-word-2505-count.jsonl\n", + "Generated a single JSONL file with 150 samples (100 token repeat) - 2535 max words - at ./dataset/shuffle-word-2535-count.jsonl\n", + "Generated JSONL file with - 2535 max words, 100 samples - at ./dataset/gen-word-2535-count.jsonl\n", + "Generated JSONL file with - 2090 max words, 100 samples - at ./dataset/gen-word-2090-count.jsonl\n", + "Generated JSONL file with - 2415 max words, 100 samples - at ./dataset/gen-word-2415-count.jsonl\n", + "Generated JSONL file with - 2440 max words, 100 samples - at ./dataset/gen-word-2440-count.jsonl\n", + "Generated JSONL file with - 2375 max words, 100 samples - at ./dataset/gen-word-2375-count.jsonl\n", + "Generated JSONL file with - 2430 max words, 100 samples - at ./dataset/gen-word-2430-count.jsonl\n", + "Generated JSONL file with - 2250 max words, 100 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated JSONL file with - 2550 max words, 100 samples - at ./dataset/gen-word-2550-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2885 max words - at ./dataset/shuffle-word-2885-count.jsonl\n", + "Generated a single JSONL file with 184 samples (100 token repeat) - 2445 max words - at ./dataset/shuffle-word-2445-count.jsonl\n", + "Generated JSONL file with - 2085 max words, 100 samples - at ./dataset/gen-word-2085-count.jsonl\n", + "Generated JSONL file with - 2310 max words, 100 samples - at ./dataset/gen-word-2310-count.jsonl\n", + "Generated a single JSONL file with 121 samples (100 token repeat) - 2645 max words - at ./dataset/shuffle-word-2645-count.jsonl\n", + "Generated JSONL file with - 2280 max words, 100 samples - at ./dataset/gen-word-2280-count.jsonl\n", + "Generated a single JSONL file with 150 samples (100 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", + "Generated JSONL file with - 2235 max words, 100 samples - at ./dataset/gen-word-2235-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", + "Generated JSONL file with - 2270 max words, 100 samples - at ./dataset/gen-word-2270-count.jsonl\n", + "Generated JSONL file with - 2190 max words, 100 samples - at ./dataset/gen-word-2190-count.jsonl\n", + "Generated a single JSONL file with 191 samples (100 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", + "Generated JSONL file with - 2390 max words, 100 samples - at ./dataset/gen-word-2390-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2185 max words - at ./dataset/shuffle-word-2185-count.jsonl\n", + "Generated JSONL file with - 2460 max words, 100 samples - at ./dataset/gen-word-2460-count.jsonl\n", + "Generated a single JSONL file with 113 samples (100 token repeat) - 2605 max words - at ./dataset/shuffle-word-2605-count.jsonl\n", + "Generated JSONL file with - 2570 max words, 100 samples - at ./dataset/gen-word-2570-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2865 max words - at ./dataset/shuffle-word-2865-count.jsonl\n", + "Generated JSONL file with - 2655 max words, 100 samples - at ./dataset/gen-word-2655-count.jsonl\n", + "Generated JSONL file with - 2930 max words, 100 samples - at ./dataset/gen-word-2930-count.jsonl\n", + "Generated JSONL file with - 2735 max words, 100 samples - at ./dataset/gen-word-2735-count.jsonl\n", + "Generated JSONL file with - 2450 max words, 100 samples - at ./dataset/gen-word-2450-count.jsonl\n", + "Generated a single JSONL file with 179 samples (100 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", + "Generated JSONL file with - 2475 max words, 100 samples - at ./dataset/gen-word-2475-count.jsonl\n", + "Generated a single JSONL file with 151 samples (100 token repeat) - 2585 max words - at ./dataset/shuffle-word-2585-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", + "Generated JSONL file with - 2890 max words, 100 samples - at ./dataset/gen-word-2890-count.jsonlGenerated a single JSONL file with 152 samples (100 token repeat) - 2555 max words - at ./dataset/shuffle-word-2555-count.jsonl\n", + "\n", + "Generated a single JSONL file with 198 samples (100 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", + "Generated JSONL file with - 2705 max words, 100 samples - at ./dataset/gen-word-2705-count.jsonl\n", + "Generated a single JSONL file with 197 samples (100 token repeat) - 2355 max words - at ./dataset/shuffle-word-2355-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3060 max words - at ./dataset/shuffle-word-3060-count.jsonl\n", + "Generated JSONL file with - 2445 max words, 100 samples - at ./dataset/gen-word-2445-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", + "Generated a single JSONL file with 102 samples (100 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", + "Generated JSONL file with - 2795 max words, 100 samples - at ./dataset/gen-word-2795-count.jsonl\n", + "Generated a single JSONL file with 187 samples (100 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", + "Generated JSONL file with - 2385 max words, 100 samples - at ./dataset/gen-word-2385-count.jsonl\n", + "Generated JSONL file with - 2650 max words, 100 samples - at ./dataset/gen-word-2650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", + "Generated a single JSONL file with 118 samples (100 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", + "Generated JSONL file with - 2470 max words, 100 samples - at ./dataset/gen-word-2470-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2285 max words - at ./dataset/shuffle-word-2285-count.jsonl\n", + "Generated JSONL file with - 2205 max words, 100 samples - at ./dataset/gen-word-2205-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", + "Generated JSONL file with - 2325 max words, 100 samples - at ./dataset/gen-word-2325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2955 max words - at ./dataset/shuffle-word-2955-count.jsonl\n", + "Generated JSONL file with - 2625 max words, 100 samples - at ./dataset/gen-word-2625-count.jsonl\n", + "Generated JSONL file with - 2405 max words, 100 samples - at ./dataset/gen-word-2405-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3520 max words - at ./dataset/shuffle-word-3520-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", + "Generated JSONL file with - 2240 max words, 100 samples - at ./dataset/gen-word-2240-count.jsonl\n", + "Generated a single JSONL file with 26337 samples (500 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2875 max words - at ./dataset/shuffle-word-2875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3540 max words - at ./dataset/shuffle-word-3540-count.jsonl\n", + "Generated JSONL file with - 2365 max words, 100 samples - at ./dataset/gen-word-2365-count.jsonl\n", + "Generated a single JSONL file with 101 samples (100 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", + "Generated a single JSONL file with 184 samples (100 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", + "Generated JSONL file with - 2565 max words, 100 samples - at ./dataset/gen-word-2565-count.jsonl\n", + "Generated a single JSONL file with 148 samples (100 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", + "Generated a single JSONL file with 117 samples (100 token repeat) - 2615 max words - at ./dataset/shuffle-word-2615-count.jsonl\n", + "Generated JSONL file with - 2605 max words, 100 samples - at ./dataset/gen-word-2605-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3510 max words - at ./dataset/shuffle-word-3510-count.jsonl\n", + "Generated a single JSONL file with 183 samples (100 token repeat) - 2405 max words - at ./dataset/shuffle-word-2405-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3345 max words - at ./dataset/shuffle-word-3345-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3090 max words - at ./dataset/shuffle-word-3090-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2965 max words - at ./dataset/shuffle-word-2965-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3530 max words - at ./dataset/shuffle-word-3530-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3080 max words - at ./dataset/shuffle-word-3080-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3505 max words - at ./dataset/shuffle-word-3505-count.jsonl\n", + "Generated JSONL file with - 3085 max words, 100 samples - at ./dataset/gen-word-3085-count.jsonl\n", + "Generated JSONL file with - 2920 max words, 100 samples - at ./dataset/gen-word-2920-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3120 max words - at ./dataset/shuffle-word-3120-count.jsonl\n", + "Generated JSONL file with - 2345 max words, 100 samples - at ./dataset/gen-word-2345-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3495 max words - at ./dataset/shuffle-word-3495-count.jsonl\n", + "Generated a single JSONL file with 200 samples (100 token repeat) - 2255 max words - at ./dataset/shuffle-word-2255-count.jsonl\n", + "Generated JSONL file with - 3545 max words, 100 samples - at ./dataset/gen-word-3545-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3170 max words - at ./dataset/shuffle-word-3170-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3515 max words - at ./dataset/shuffle-word-3515-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", + "Generated JSONL file with - 3470 max words, 100 samples - at ./dataset/gen-word-3470-count.jsonl\n", + "Generated a single JSONL file with 187 samples (100 token repeat) - 2495 max words - at ./dataset/shuffle-word-2495-count.jsonl\n", + "Generated a single JSONL file with 161 samples (100 token repeat) - 2525 max words - at ./dataset/shuffle-word-2525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3635 max words - at ./dataset/shuffle-word-3635-count.jsonl\n", + "Generated a single JSONL file with 13274 samples (500 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", + "Generated JSONL file with - 3560 max words, 100 samples - at ./dataset/gen-word-3560-count.jsonl\n", + "Generated JSONL file with - 3490 max words, 100 samples - at ./dataset/gen-word-3490-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3490 max words - at ./dataset/shuffle-word-3490-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3670 max words - at ./dataset/shuffle-word-3670-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3240 max words - at ./dataset/shuffle-word-3240-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3310 max words - at ./dataset/shuffle-word-3310-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3290 max words - at ./dataset/shuffle-word-3290-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3340 max words - at ./dataset/shuffle-word-3340-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3370 max words - at ./dataset/shuffle-word-3370-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3665 max words - at ./dataset/shuffle-word-3665-count.jsonl\n", - "Generated JSONL file with - 1305 max words, 150 samples - at ./dataset/gen-word-1305-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3655 max words - at ./dataset/shuffle-word-3655-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1745 max words - at ./dataset/shuffle-word-1745-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1185 max words - at ./dataset/shuffle-word-1185-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3605 max words - at ./dataset/shuffle-word-3605-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3670 max words - at ./dataset/shuffle-word-3670-count.jsonl\n", + "Generated a single JSONL file with 118 samples (100 token repeat) - 2635 max words - at ./dataset/shuffle-word-2635-count.jsonl\n", + "Generated a single JSONL file with 13999 samples (500 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated JSONL file with - 3535 max words, 100 samples - at ./dataset/gen-word-3535-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3105 max words - at ./dataset/shuffle-word-3105-count.jsonl\n", + "Generated JSONL file with - 3495 max words, 100 samples - at ./dataset/gen-word-3495-count.jsonl\n", + "Generated a single JSONL file with 101 samples (100 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3590 max words - at ./dataset/shuffle-word-3590-count.jsonl\n", + "\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3355 max words - at ./dataset/shuffle-word-3355-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3735 max words - at ./dataset/shuffle-word-3735-count.jsonl\n", + "Generated JSONL file with - 3680 max words, 100 samples - at ./dataset/gen-word-3680-count.jsonl\n", + "Generated JSONL file with - 2335 max words, 100 samples - at ./dataset/gen-word-2335-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3790 max words - at ./dataset/shuffle-word-3790-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3145 max words - at ./dataset/shuffle-word-3145-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3110 max words - at ./dataset/shuffle-word-3110-count.jsonl\n", + "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", + "Generated JSONL file with - 3655 max words, 100 samples - at ./dataset/gen-word-3655-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", + "Generated a single JSONL file with 116 samples (100 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3615 max words - at ./dataset/shuffle-word-3615-count.jsonl\n", - "Generated JSONL file with - 2410 max words, 125 samples - at ./dataset/gen-word-2410-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3630 max words - at ./dataset/shuffle-word-3630-count.jsonl\n", - "Generated a single JSONL file with 497 samples (100 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", - "Generated JSONL file with - 1340 max words, 150 samples - at ./dataset/gen-word-1340-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", - "Generated JSONL file with - 1445 max words, 150 samples - at ./dataset/gen-word-1445-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", - "Generated JSONL file with - 1530 max words, 150 samples - at ./dataset/gen-word-1530-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", - "Generated JSONL file with - 1010 max words, 150 samples - at ./dataset/gen-word-1010-count.jsonl\n", - "Generated JSONL file with - 3595 max words, 100 samples - at ./dataset/gen-word-3595-count.jsonl\n", - "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", - "Generated a single JSONL file with 406 samples (100 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3570 max words - at ./dataset/shuffle-word-3570-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3330 max words - at ./dataset/shuffle-word-3330-count.jsonl\n", - "Generated JSONL file with - 765 max words, 150 samples - at ./dataset/gen-word-765-count.jsonl\n", - "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", - "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", - "Generated a single JSONL file with 586 samples (100 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2895 max words - at ./dataset/shuffle-word-2895-count.jsonl\n", - "Generated JSONL file with - 3620 max words, 100 samples - at ./dataset/gen-word-3620-count.jsonl\n", - "Generated a single JSONL file with 251 samples (100 token repeat) - 1235 max words - at ./dataset/shuffle-word-1235-count.jsonl\n", - "Generated JSONL file with - 3385 max words, 100 samples - at ./dataset/gen-word-3385-count.jsonl\n", - "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", - "Generated JSONL file with - 1355 max words, 150 samples - at ./dataset/gen-word-1355-count.jsonl\n", - "Generated a single JSONL file with 498 samples (100 token repeat) - 505 max words - at ./dataset/shuffle-word-505-count.jsonl\n", - "Generated JSONL file with - 3155 max words, 100 samples - at ./dataset/gen-word-3155-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3585 max words - at ./dataset/shuffle-word-3585-count.jsonl\n", - "Generated JSONL file with - 2590 max words, 125 samples - at ./dataset/gen-word-2590-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", - "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", - "Generated JSONL file with - 3635 max words, 100 samples - at ./dataset/gen-word-3635-count.jsonl\n", - "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3090 max words - at ./dataset/shuffle-word-3090-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3620 max words - at ./dataset/shuffle-word-3620-count.jsonl\n", - "Generated a single JSONL file with 404 samples (100 token repeat) - 695 max words - at ./dataset/shuffle-word-695-count.jsonl\n", - "Generated JSONL file with - 1890 max words, 150 samples - at ./dataset/gen-word-1890-count.jsonl\n", - "Generated JSONL file with - 2865 max words, 125 samples - at ./dataset/gen-word-2865-count.jsonl\n", - "Generated JSONL file with - 3420 max words, 100 samples - at ./dataset/gen-word-3420-count.jsonl\n", - "Generated JSONL file with - 3320 max words, 100 samples - at ./dataset/gen-word-3320-count.jsonl\n", - "Generated JSONL file with - 3460 max words, 100 samples - at ./dataset/gen-word-3460-count.jsonl\n", + "Generated a single JSONL file with 32726 samples (500 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated JSONL file with - 2975 max words, 100 samples - at ./dataset/gen-word-2975-count.jsonl\n", + "Generated JSONL file with - 3510 max words, 100 samples - at ./dataset/gen-word-3510-count.jsonl\n", "Generated JSONL file with - 3540 max words, 100 samples - at ./dataset/gen-word-3540-count.jsonl\n", - "Generated JSONL file with - 1365 max words, 150 samples - at ./dataset/gen-word-1365-count.jsonl\n", - "Generated a single JSONL file with 1679 samples (100 token repeat) - 135 max words - at ./dataset/shuffle-word-135-count.jsonl\n", - "Generated JSONL file with - 3655 max words, 100 samples - at ./dataset/gen-word-3655-count.jsonl\n", - "Generated JSONL file with - 1165 max words, 150 samples - at ./dataset/gen-word-1165-count.jsonl\n", - "Generated JSONL file with - 3290 max words, 100 samples - at ./dataset/gen-word-3290-count.jsonl\n", - "Generated JSONL file with - 1700 max words, 150 samples - at ./dataset/gen-word-1700-count.jsonl\n", - "Generated JSONL file with - 1415 max words, 150 samples - at ./dataset/gen-word-1415-count.jsonl\n", - "Generated JSONL file with - 3605 max words, 100 samples - at ./dataset/gen-word-3605-count.jsonl\n", - "Generated JSONL file with - 3610 max words, 100 samples - at ./dataset/gen-word-3610-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", - "Generated JSONL file with - 1175 max words, 150 samples - at ./dataset/gen-word-1175-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3085 max words - at ./dataset/shuffle-word-3085-count.jsonl\n", - "Generated JSONL file with - 1870 max words, 150 samples - at ./dataset/gen-word-1870-count.jsonl\n", - "Generated JSONL file with - 1395 max words, 150 samples - at ./dataset/gen-word-1395-count.jsonl\n", - "Generated JSONL file with - 1420 max words, 150 samples - at ./dataset/gen-word-1420-count.jsonl\n", - "Generated a single JSONL file with 204 samples (100 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", - "Generated JSONL file with - 3670 max words, 100 samples - at ./dataset/gen-word-3670-count.jsonl\n", - "Generated a single JSONL file with 201 samples (100 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", - "Generated JSONL file with - 1135 max words, 150 samples - at ./dataset/gen-word-1135-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3695 max words - at ./dataset/shuffle-word-3695-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", - "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3405 max words - at ./dataset/shuffle-word-3405-count.jsonl\n", - "Generated a single JSONL file with 120 samples (100 token repeat) - 2655 max words - at ./dataset/shuffle-word-2655-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", - "Generated JSONL file with - 1360 max words, 150 samples - at ./dataset/gen-word-1360-count.jsonl\n", - "Generated JSONL file with - 1105 max words, 150 samples - at ./dataset/gen-word-1105-count.jsonl\n", - "Generated JSONL file with - 1150 max words, 150 samples - at ./dataset/gen-word-1150-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2805 max words - at ./dataset/shuffle-word-2805-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3320 max words - at ./dataset/shuffle-word-3320-count.jsonl\n", - "Generated JSONL file with - 2430 max words, 125 samples - at ./dataset/gen-word-2430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3655 max words - at ./dataset/shuffle-word-3655-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3730 max words - at ./dataset/shuffle-word-3730-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3570 max words - at ./dataset/shuffle-word-3570-count.jsonlGenerated JSONL file with - 3705 max words, 100 samples - at ./dataset/gen-word-3705-count.jsonl\n", + "\n", + "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", + "Generated JSONL file with - 3715 max words, 100 samples - at ./dataset/gen-word-3715-count.jsonl\n", + "Generated JSONL file with - 3780 max words, 100 samples - at ./dataset/gen-word-3780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3545 max words - at ./dataset/shuffle-word-3545-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3555 max words - at ./dataset/shuffle-word-3555-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3705 max words - at ./dataset/shuffle-word-3705-count.jsonl\n", - "Generated a single JSONL file with 1001 samples (100 token repeat) - 225 max words - at ./dataset/shuffle-word-225-count.jsonl\n", - "Generated JSONL file with - 1650 max words, 150 samples - at ./dataset/gen-word-1650-count.jsonl\n", - "Generated a single JSONL file with 2676 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", - "Generated JSONL file with - 3245 max words, 100 samples - at ./dataset/gen-word-3245-count.jsonl\n", - "Generated JSONL file with - 1470 max words, 150 samples - at ./dataset/gen-word-1470-count.jsonl\n", - "Generated a single JSONL file with 1395 samples (100 token repeat) - 175 max words - at ./dataset/shuffle-word-175-count.jsonl\n", - "Generated a single JSONL file with 104 samples (100 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", - "Generated JSONL file with - 1140 max words, 150 samples - at ./dataset/gen-word-1140-count.jsonl\n", - "Generated JSONL file with - 1110 max words, 150 samples - at ./dataset/gen-word-1110-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3190 max words - at ./dataset/shuffle-word-3190-count.jsonl\n", + "Generated JSONL file with - 3745 max words, 100 samples - at ./dataset/gen-word-3745-count.jsonlGenerated JSONL file with - 3610 max words, 100 samples - at ./dataset/gen-word-3610-count.jsonl\n", + "\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3755 max words - at ./dataset/shuffle-word-3755-count.jsonl\n", - "Generated JSONL file with - 3335 max words, 100 samples - at ./dataset/gen-word-3335-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3395 max words - at ./dataset/shuffle-word-3395-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", - "Generated JSONL file with - 3040 max words, 100 samples - at ./dataset/gen-word-3040-count.jsonl\n", - "Generated JSONL file with - 1720 max words, 150 samples - at ./dataset/gen-word-1720-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1715 max words - at ./dataset/shuffle-word-1715-count.jsonl\n", - "Generated JSONL file with - 1535 max words, 150 samples - at ./dataset/gen-word-1535-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3795 max words - at ./dataset/shuffle-word-3795-count.jsonl\n", - "Generated JSONL file with - 1750 max words, 150 samples - at ./dataset/gen-word-1750-count.jsonl\n", - "Generated JSONL file with - 1990 max words, 150 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3180 max words - at ./dataset/shuffle-word-3180-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3760 max words - at ./dataset/shuffle-word-3760-count.jsonl\n", - "Generated JSONL file with - 1055 max words, 150 samples - at ./dataset/gen-word-1055-count.jsonl\n", - "Generated JSONL file with - 3070 max words, 100 samples - at ./dataset/gen-word-3070-count.jsonl\n", - "Generated JSONL file with - 2150 max words, 125 samples - at ./dataset/gen-word-2150-count.jsonl\n", - "Generated JSONL file with - 3630 max words, 100 samples - at ./dataset/gen-word-3630-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1575 max words - at ./dataset/shuffle-word-1575-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1535 max words - at ./dataset/shuffle-word-1535-count.jsonl\n", - "Generated JSONL file with - 3265 max words, 100 samples - at ./dataset/gen-word-3265-count.jsonl\n", - "Generated JSONL file with - 655 max words, 150 samples - at ./dataset/gen-word-655-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3715 max words - at ./dataset/shuffle-word-3715-count.jsonl\n", - "Generated JSONL file with - 3805 max words, 100 samples - at ./dataset/gen-word-3805-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2015 max words - at ./dataset/shuffle-word-2015-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3710 max words - at ./dataset/shuffle-word-3710-count.jsonl\n", - "Generated JSONL file with - 3765 max words, 100 samples - at ./dataset/gen-word-3765-count.jsonl\n", - "Generated JSONL file with - 950 max words, 150 samples - at ./dataset/gen-word-950-count.jsonl\n", - "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", - "Generated JSONL file with - 1675 max words, 150 samples - at ./dataset/gen-word-1675-count.jsonl\n", - "Generated JSONL file with - 1425 max words, 150 samples - at ./dataset/gen-word-1425-count.jsonl\n", - "Generated JSONL file with - 2135 max words, 125 samples - at ./dataset/gen-word-2135-count.jsonl\n", - "Generated JSONL file with - 1475 max words, 150 samples - at ./dataset/gen-word-1475-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", - "Generated JSONL file with - 3705 max words, 100 samples - at ./dataset/gen-word-3705-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3685 max words - at ./dataset/shuffle-word-3685-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", - "Generated JSONL file with - 3745 max words, 100 samples - at ./dataset/gen-word-3745-count.jsonl\n", - "Generated JSONL file with - 1710 max words, 150 samples - at ./dataset/gen-word-1710-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1615 max words - at ./dataset/shuffle-word-1615-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3680 max words - at ./dataset/shuffle-word-3680-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3735 max words - at ./dataset/shuffle-word-3735-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", - "Generated JSONL file with - 1130 max words, 150 samples - at ./dataset/gen-word-1130-count.jsonl\n", - "Generated JSONL file with - 3645 max words, 100 samples - at ./dataset/gen-word-3645-count.jsonl\n", - "Generated JSONL file with - 1290 max words, 150 samples - at ./dataset/gen-word-1290-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1655 max words - at ./dataset/shuffle-word-1655-count.jsonl\n", - "Generated JSONL file with - 2595 max words, 125 samples - at ./dataset/gen-word-2595-count.jsonl\n", - "Generated JSONL file with - 1095 max words, 150 samples - at ./dataset/gen-word-1095-count.jsonl\n", - "Generated a single JSONL file with 1503 samples (100 token repeat) - 155 max words - at ./dataset/shuffle-word-155-count.jsonl\n", - "Generated JSONL file with - 3680 max words, 100 samples - at ./dataset/gen-word-3680-count.jsonl\n", - "Generated JSONL file with - 1390 max words, 150 samples - at ./dataset/gen-word-1390-count.jsonl\n", - "Generated a single JSONL file with 104 samples (100 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", - "Generated JSONL file with - 1575 max words, 150 samples - at ./dataset/gen-word-1575-count.jsonl\n", - "Generated JSONL file with - 1580 max words, 150 samples - at ./dataset/gen-word-1580-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1325 max words - at ./dataset/shuffle-word-1325-count.jsonl\n", - "Generated JSONL file with - 1810 max words, 150 samples - at ./dataset/gen-word-1810-count.jsonl\n", - "Generated JSONL file with - 2560 max words, 125 samples - at ./dataset/gen-word-2560-count.jsonl\n", - "Generated a single JSONL file with 256 samples (100 token repeat) - 1285 max words - at ./dataset/shuffle-word-1285-count.jsonl\n", - "Generated JSONL file with - 1250 max words, 150 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3480 max words - at ./dataset/shuffle-word-3480-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", "Generated JSONL file with - 3735 max words, 100 samples - at ./dataset/gen-word-3735-count.jsonl\n", - "Generated JSONL file with - 1235 max words, 150 samples - at ./dataset/gen-word-1235-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1995 max words - at ./dataset/shuffle-word-1995-count.jsonl\n", + "Generated JSONL file with - 3095 max words, 100 samples - at ./dataset/gen-word-3095-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3005 max words - at ./dataset/shuffle-word-3005-count.jsonl\n", + "Generated JSONL file with - 3765 max words, 100 samples - at ./dataset/gen-word-3765-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3155 max words - at ./dataset/shuffle-word-3155-count.jsonl\n", + "Generated JSONL file with - 3015 max words, 100 samples - at ./dataset/gen-word-3015-count.jsonl\n", + "Generated JSONL file with - 3440 max words, 100 samples - at ./dataset/gen-word-3440-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3780 max words - at ./dataset/shuffle-word-3780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", + "Generated JSONL file with - 3045 max words, 100 samples - at ./dataset/gen-word-3045-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3630 max words - at ./dataset/shuffle-word-3630-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3305 max words - at ./dataset/shuffle-word-3305-count.jsonl\n", "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", - "Generated JSONL file with - 595 max words, 150 samples - at ./dataset/gen-word-595-count.jsonl\n", - "Generated JSONL file with - 3715 max words, 100 samples - at ./dataset/gen-word-3715-count.jsonl\n", - "Generated JSONL file with - 3720 max words, 100 samples - at ./dataset/gen-word-3720-count.jsonl\n", - "Generated JSONL file with - 1640 max words, 150 samples - at ./dataset/gen-word-1640-count.jsonl\n", - "Generated JSONL file with - 1505 max words, 150 samples - at ./dataset/gen-word-1505-count.jsonl\n", - "Generated JSONL file with - 2770 max words, 125 samples - at ./dataset/gen-word-2770-count.jsonl\n", - "Generated JSONL file with - 1210 max words, 150 samples - at ./dataset/gen-word-1210-count.jsonl\n", - "Generated a single JSONL file with 590 samples (100 token repeat) - 425 max words - at ./dataset/shuffle-word-425-count.jsonl\n", - "Generated JSONL file with - 2455 max words, 125 samples - at ./dataset/gen-word-2455-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1525 max words - at ./dataset/shuffle-word-1525-count.jsonl\n", + "Generated JSONL file with - 3710 max words, 100 samples - at ./dataset/gen-word-3710-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3535 max words - at ./dataset/shuffle-word-3535-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3740 max words - at ./dataset/shuffle-word-3740-count.jsonl\n", + "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3605 max words - at ./dataset/shuffle-word-3605-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3620 max words - at ./dataset/shuffle-word-3620-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3785 max words - at ./dataset/shuffle-word-3785-count.jsonl\n", + "Generated a single JSONL file with 113 samples (100 token repeat) - 2655 max words - at ./dataset/shuffle-word-2655-count.jsonl\n", + "Generated JSONL file with - 3785 max words, 100 samples - at ./dataset/gen-word-3785-count.jsonl\n", + "Generated JSONL file with - 3795 max words, 100 samples - at ./dataset/gen-word-3795-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3640 max words - at ./dataset/shuffle-word-3640-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3485 max words - at ./dataset/shuffle-word-3485-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3710 max words - at ./dataset/shuffle-word-3710-count.jsonl\n", + "Generated JSONL file with - 3630 max words, 100 samples - at ./dataset/gen-word-3630-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3795 max words - at ./dataset/shuffle-word-3795-count.jsonlGenerated JSONL file with - 3565 max words, 100 samples - at ./dataset/gen-word-3565-count.jsonl\n", + "\n", + "Generated JSONL file with - 3605 max words, 100 samples - at ./dataset/gen-word-3605-count.jsonl\n", + "Generated JSONL file with - 3635 max words, 100 samples - at ./dataset/gen-word-3635-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3695 max words - at ./dataset/shuffle-word-3695-count.jsonl\n", + "Generated a single JSONL file with 115 samples (100 token repeat) - 2675 max words - at ./dataset/shuffle-word-2675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3645 max words - at ./dataset/shuffle-word-3645-count.jsonl\n", + "Generated JSONL file with - 3555 max words, 100 samples - at ./dataset/gen-word-3555-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", - "Generated JSONL file with - 2835 max words, 125 samples - at ./dataset/gen-word-2835-count.jsonl\n", - "Generated a single JSONL file with 26068 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", - "Generated JSONL file with - 2860 max words, 125 samples - at ./dataset/gen-word-2860-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", - "Generated a single JSONL file with 202 samples (100 token repeat) - 1365 max words - at ./dataset/shuffle-word-1365-count.jsonl\n", - "Generated JSONL file with - 1220 max words, 150 samples - at ./dataset/gen-word-1220-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1015 max words - at ./dataset/shuffle-word-1015-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1175 max words - at ./dataset/shuffle-word-1175-count.jsonl\n", - "Generated a single JSONL file with 1441 samples (100 token repeat) - 165 max words - at ./dataset/shuffle-word-165-count.jsonl\n", - "Generated a single JSONL file with 149 samples (100 token repeat) - 2595 max words - at ./dataset/shuffle-word-2595-count.jsonl\n", - "Generated JSONL file with - 1460 max words, 150 samples - at ./dataset/gen-word-1460-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 555 max words - at ./dataset/shuffle-word-555-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3855 max words - at ./dataset/shuffle-word-3855-count.jsonl\n", - "Generated a single JSONL file with 1094 samples (100 token repeat) - 205 max words - at ./dataset/shuffle-word-205-count.jsonl\n", - "Generated JSONL file with - 1255 max words, 150 samples - at ./dataset/gen-word-1255-count.jsonl\n", - "Generated JSONL file with - 1765 max words, 150 samples - at ./dataset/gen-word-1765-count.jsonl\n", - "Generated a single JSONL file with 189 samples (100 token repeat) - 2465 max words - at ./dataset/shuffle-word-2465-count.jsonl\n", - "Generated a single JSONL file with 205 samples (100 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1105 max words - at ./dataset/shuffle-word-1105-count.jsonl\n", - "Generated JSONL file with - 1660 max words, 150 samples - at ./dataset/gen-word-1660-count.jsonl\n", - "Generated JSONL file with - 1590 max words, 150 samples - at ./dataset/gen-word-1590-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1505 max words - at ./dataset/shuffle-word-1505-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1625 max words - at ./dataset/shuffle-word-1625-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3880 max words - at ./dataset/shuffle-word-3880-count.jsonl\n", - "Generated JSONL file with - 1050 max words, 150 samples - at ./dataset/gen-word-1050-count.jsonl\n", - "Generated JSONL file with - 2200 max words, 125 samples - at ./dataset/gen-word-2200-count.jsonl\n", - "Generated JSONL file with - 3690 max words, 100 samples - at ./dataset/gen-word-3690-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", - "Generated JSONL file with - 1995 max words, 150 samples - at ./dataset/gen-word-1995-count.jsonl\n", - "Generated JSONL file with - 1435 max words, 150 samples - at ./dataset/gen-word-1435-count.jsonl\n", - "Generated a single JSONL file with 10607 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", - "Generated JSONL file with - 1155 max words, 150 samples - at ./dataset/gen-word-1155-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1665 max words - at ./dataset/shuffle-word-1665-count.jsonl\n", - "Generated a single JSONL file with 2783 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1005 max words - at ./dataset/shuffle-word-1005-count.jsonl\n", - "Generated JSONL file with - 2740 max words, 125 samples - at ./dataset/gen-word-2740-count.jsonl\n", - "Generated JSONL file with - 1275 max words, 150 samples - at ./dataset/gen-word-1275-count.jsonl\n", - "Generated JSONL file with - 1215 max words, 150 samples - at ./dataset/gen-word-1215-count.jsonl\n", - "Generated JSONL file with - 1410 max words, 150 samples - at ./dataset/gen-word-1410-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1475 max words - at ./dataset/shuffle-word-1475-count.jsonl\n", - "Generated JSONL file with - 1570 max words, 150 samples - at ./dataset/gen-word-1570-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1805 max words - at ./dataset/shuffle-word-1805-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", - "Generated JSONL file with - 3910 max words, 100 samples - at ./dataset/gen-word-3910-count.jsonl\n", - "Generated JSONL file with - 1770 max words, 150 samples - at ./dataset/gen-word-1770-count.jsonl\n", - "Generated JSONL file with - 1400 max words, 150 samples - at ./dataset/gen-word-1400-count.jsonl\n", - "Generated JSONL file with - 1310 max words, 150 samples - at ./dataset/gen-word-1310-count.jsonl\n", - "Generated JSONL file with - 3230 max words, 100 samples - at ./dataset/gen-word-3230-count.jsonl\n", - "Generated a single JSONL file with 249 samples (100 token repeat) - 1225 max words - at ./dataset/shuffle-word-1225-count.jsonl\n", - "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", - "Generated JSONL file with - 3005 max words, 100 samples - at ./dataset/gen-word-3005-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2135 max words - at ./dataset/shuffle-word-2135-count.jsonl\n", "Generated JSONL file with - 3640 max words, 100 samples - at ./dataset/gen-word-3640-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", - "Generated a single JSONL file with 185 samples (100 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonl\n", - "Generated JSONL file with - 3895 max words, 100 samples - at ./dataset/gen-word-3895-count.jsonl\n", - "Generated JSONL file with - 3055 max words, 100 samples - at ./dataset/gen-word-3055-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3840 max words - at ./dataset/shuffle-word-3840-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", - "Generated a single JSONL file with 251 samples (100 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", - "Generated JSONL file with - 3840 max words, 100 samples - at ./dataset/gen-word-3840-count.jsonl\n", - "Generated JSONL file with - 3685 max words, 100 samples - at ./dataset/gen-word-3685-count.jsonl\n", - "Generated a single JSONL file with 251 samples (100 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", - "Generated a single JSONL file with 254 samples (100 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", - "Generated a single JSONL file with 246 samples (100 token repeat) - 1255 max words - at ./dataset/shuffle-word-1255-count.jsonl\n", - "Generated JSONL file with - 3860 max words, 100 samples - at ./dataset/gen-word-3860-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1455 max words - at ./dataset/shuffle-word-1455-count.jsonl\n", - "Generated JSONL file with - 3565 max words, 100 samples - at ./dataset/gen-word-3565-count.jsonl\n", - "Generated a single JSONL file with 259 samples (100 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1495 max words - at ./dataset/shuffle-word-1495-count.jsonl\n", - "Generated JSONL file with - 1270 max words, 150 samples - at ./dataset/gen-word-1270-count.jsonl\n", - "Generated a single JSONL file with 152 samples (100 token repeat) - 2565 max words - at ./dataset/shuffle-word-2565-count.jsonl\n", - "Generated JSONL file with - 1070 max words, 150 samples - at ./dataset/gen-word-1070-count.jsonl\n", - "Generated JSONL file with - 1510 max words, 150 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated JSONL file with - 3730 max words, 100 samples - at ./dataset/gen-word-3730-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3680 max words - at ./dataset/shuffle-word-3680-count.jsonl\n", + "Generated JSONL file with - 3740 max words, 100 samples - at ./dataset/gen-word-3740-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3595 max words - at ./dataset/shuffle-word-3595-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3745 max words - at ./dataset/shuffle-word-3745-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3580 max words - at ./dataset/shuffle-word-3580-count.jsonl\n", + "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", + "Generated JSONL file with - 3485 max words, 100 samples - at ./dataset/gen-word-3485-count.jsonl\n", + "Generated JSONL file with - 3665 max words, 100 samples - at ./dataset/gen-word-3665-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3765 max words - at ./dataset/shuffle-word-3765-count.jsonl\n", + "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", + "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", "Generated JSONL file with - 3590 max words, 100 samples - at ./dataset/gen-word-3590-count.jsonl\n", - "Generated a single JSONL file with 183 samples (100 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1415 max words - at ./dataset/shuffle-word-1415-count.jsonl\n", - "Generated JSONL file with - 1385 max words, 150 samples - at ./dataset/gen-word-1385-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3890 max words - at ./dataset/shuffle-word-3890-count.jsonl\n", - "Generated JSONL file with - 3710 max words, 100 samples - at ./dataset/gen-word-3710-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3905 max words - at ./dataset/shuffle-word-3905-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1975 max words - at ./dataset/shuffle-word-1975-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", - "Generated JSONL file with - 1615 max words, 150 samples - at ./dataset/gen-word-1615-count.jsonl\n", - "Generated JSONL file with - 2990 max words, 125 samples - at ./dataset/gen-word-2990-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3410 max words - at ./dataset/shuffle-word-3410-count.jsonl\n", - "Generated JSONL file with - 1030 max words, 150 samples - at ./dataset/gen-word-1030-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", - "Generated a single JSONL file with 248 samples (100 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", - "Generated JSONL file with - 1695 max words, 150 samples - at ./dataset/gen-word-1695-count.jsonl\n", - "Generated JSONL file with - 1370 max words, 150 samples - at ./dataset/gen-word-1370-count.jsonl\n", - "Generated JSONL file with - 2830 max words, 125 samples - at ./dataset/gen-word-2830-count.jsonl\n", - "Generated a single JSONL file with 401 samples (100 token repeat) - 625 max words - at ./dataset/shuffle-word-625-count.jsonl\n", - "Generated JSONL file with - 2955 max words, 125 samples - at ./dataset/gen-word-2955-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", - "Generated JSONL file with - 1335 max words, 150 samples - at ./dataset/gen-word-1335-count.jsonl\n", - "Generated JSONL file with - 1260 max words, 150 samples - at ./dataset/gen-word-1260-count.jsonl\n", - "Generated JSONL file with - 1245 max words, 150 samples - at ./dataset/gen-word-1245-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3965 max words - at ./dataset/shuffle-word-3965-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3565 max words - at ./dataset/shuffle-word-3565-count.jsonl\n", + "Generated a single JSONL file with 186 samples (100 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3715 max words - at ./dataset/shuffle-word-3715-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3660 max words - at ./dataset/shuffle-word-3660-count.jsonl\n", + "Generated a single JSONL file with 186 samples (100 token repeat) - 2485 max words - at ./dataset/shuffle-word-2485-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3585 max words - at ./dataset/shuffle-word-3585-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3190 max words - at ./dataset/shuffle-word-3190-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", + "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3980 max words - at ./dataset/shuffle-word-3980-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1115 max words - at ./dataset/shuffle-word-1115-count.jsonl\n", - "Generated JSONL file with - 1295 max words, 150 samples - at ./dataset/gen-word-1295-count.jsonl\n", - "Generated a single JSONL file with 202 samples (100 token repeat) - 1385 max words - at ./dataset/shuffle-word-1385-count.jsonl\n", - "Generated JSONL file with - 1200 max words, 150 samples - at ./dataset/gen-word-1200-count.jsonl\n", - "Generated JSONL file with - 2295 max words, 125 samples - at ./dataset/gen-word-2295-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2155 max words - at ./dataset/shuffle-word-2155-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3945 max words - at ./dataset/shuffle-word-3945-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3470 max words - at ./dataset/shuffle-word-3470-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", - "Generated JSONL file with - 2020 max words, 125 samples - at ./dataset/gen-word-2020-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", - "Generated a single JSONL file with 296 samples (100 token repeat) - 1125 max words - at ./dataset/shuffle-word-1125-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3970 max words - at ./dataset/shuffle-word-3970-count.jsonl\n", - "Generated a single JSONL file with 205 samples (100 token repeat) - 1335 max words - at ./dataset/shuffle-word-1335-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", - "Generated JSONL file with - 2070 max words, 125 samples - at ./dataset/gen-word-2070-count.jsonl\n", - "Generated a single JSONL file with 703 samples (100 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2305 max words - at ./dataset/shuffle-word-2305-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonl\n", - "Generated JSONL file with - 3130 max words, 100 samples - at ./dataset/gen-word-3130-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3590 max words - at ./dataset/shuffle-word-3590-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3985 max words - at ./dataset/shuffle-word-3985-count.jsonl\n", - "Generated JSONL file with - 2140 max words, 125 samples - at ./dataset/gen-word-2140-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2275 max words - at ./dataset/shuffle-word-2275-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2235 max words - at ./dataset/shuffle-word-2235-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", - "Generated JSONL file with - 3380 max words, 100 samples - at ./dataset/gen-word-3380-count.jsonl\n", - "Generated JSONL file with - 1300 max words, 150 samples - at ./dataset/gen-word-1300-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", - "Generated JSONL file with - 3185 max words, 100 samples - at ./dataset/gen-word-3185-count.jsonl\n", - "Generated JSONL file with - 1040 max words, 150 samples - at ./dataset/gen-word-1040-count.jsonl\n", - "Generated JSONL file with - 1495 max words, 150 samples - at ./dataset/gen-word-1495-count.jsonl\n", - "Generated JSONL file with - 1430 max words, 150 samples - at ./dataset/gen-word-1430-count.jsonl\n", - "Generated JSONL file with - 1625 max words, 150 samples - at ./dataset/gen-word-1625-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2335 max words - at ./dataset/shuffle-word-2335-count.jsonl\n", - "Generated a single JSONL file with 1369 samples (100 token repeat) - 185 max words - at ./dataset/shuffle-word-185-count.jsonl\n", - "Generated JSONL file with - 3970 max words, 100 samples - at ./dataset/gen-word-3970-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", - "Generated a single JSONL file with 248 samples (100 token repeat) - 1245 max words - at ./dataset/shuffle-word-1245-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", - "Generated a single JSONL file with 202 samples (100 token repeat) - 1395 max words - at ./dataset/shuffle-word-1395-count.jsonl\n", - "Generated JSONL file with - 2945 max words, 125 samples - at ./dataset/gen-word-2945-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", - "Generated JSONL file with - 1875 max words, 150 samples - at ./dataset/gen-word-1875-count.jsonl\n", - "Generated JSONL file with - 3165 max words, 100 samples - at ./dataset/gen-word-3165-count.jsonl\n", - "Generated JSONL file with - 1855 max words, 150 samples - at ./dataset/gen-word-1855-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1085 max words - at ./dataset/shuffle-word-1085-count.jsonl\n", - "Generated a single JSONL file with 202 samples (100 token repeat) - 1355 max words - at ./dataset/shuffle-word-1355-count.jsonl\n", - "Generated a single JSONL file with 248 samples (100 token repeat) - 1295 max words - at ./dataset/shuffle-word-1295-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3455 max words - at ./dataset/shuffle-word-3455-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3990 max words - at ./dataset/shuffle-word-3990-count.jsonl\n", - "Generated JSONL file with - 1830 max words, 150 samples - at ./dataset/gen-word-1830-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2145 max words - at ./dataset/shuffle-word-2145-count.jsonl\n", - "Generated JSONL file with - 3965 max words, 100 samples - at ./dataset/gen-word-3965-count.jsonl\n", - "Generated JSONL file with - 1645 max words, 150 samples - at ./dataset/gen-word-1645-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", - "Generated a single JSONL file with 246 samples (100 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2205 max words - at ./dataset/shuffle-word-2205-count.jsonl\n", - "Generated JSONL file with - 2230 max words, 125 samples - at ./dataset/gen-word-2230-count.jsonl\n", - "Generated JSONL file with - 1670 max words, 150 samples - at ./dataset/gen-word-1670-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2905 max words - at ./dataset/shuffle-word-2905-count.jsonl\n", - "Generated a single JSONL file with 188 samples (100 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", - "Generated JSONL file with - 1115 max words, 150 samples - at ./dataset/gen-word-1115-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2125 max words - at ./dataset/shuffle-word-2125-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2255 max words - at ./dataset/shuffle-word-2255-count.jsonl\n", - "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", - "Generated a single JSONL file with 115 samples (100 token repeat) - 2665 max words - at ./dataset/shuffle-word-2665-count.jsonl\n", - "Generated JSONL file with - 1375 max words, 150 samples - at ./dataset/gen-word-1375-count.jsonl\n", - "Generated JSONL file with - 1560 max words, 150 samples - at ./dataset/gen-word-1560-count.jsonl\n", - "Generated a single JSONL file with 401 samples (100 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3785 max words - at ./dataset/shuffle-word-3785-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1165 max words - at ./dataset/shuffle-word-1165-count.jsonl\n", - "Generated JSONL file with - 1965 max words, 150 samples - at ./dataset/gen-word-1965-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3135 max words - at ./dataset/shuffle-word-3135-count.jsonl\n", - "Generated JSONL file with - 3695 max words, 100 samples - at ./dataset/gen-word-3695-count.jsonl\n", - "Generated a single JSONL file with 103 samples (100 token repeat) - 2725 max words - at ./dataset/shuffle-word-2725-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1155 max words - at ./dataset/shuffle-word-1155-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3610 max words - at ./dataset/shuffle-word-3610-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2265 max words - at ./dataset/shuffle-word-2265-count.jsonl\n", - "Generated a single JSONL file with 201 samples (100 token repeat) - 1315 max words - at ./dataset/shuffle-word-1315-count.jsonl\n", - "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", - "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", - "Generated a single JSONL file with 116 samples (100 token repeat) - 2635 max words - at ./dataset/shuffle-word-2635-count.jsonl\n", - "Generated JSONL file with - 1380 max words, 150 samples - at ./dataset/gen-word-1380-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1075 max words - at ./dataset/shuffle-word-1075-count.jsonl\n", - "Generated JSONL file with - 1865 max words, 150 samples - at ./dataset/gen-word-1865-count.jsonl\n", - "Generated JSONL file with - 3905 max words, 100 samples - at ./dataset/gen-word-3905-count.jsonl\n", - "Generated a single JSONL file with 246 samples (100 token repeat) - 1205 max words - at ./dataset/shuffle-word-1205-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", - "Generated JSONL file with - 2530 max words, 125 samples - at ./dataset/gen-word-2530-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", - "Generated JSONL file with - 3870 max words, 100 samples - at ./dataset/gen-word-3870-count.jsonl\n", - "Generated a single JSONL file with 297 samples (100 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", - "Generated JSONL file with - 1100 max words, 150 samples - at ./dataset/gen-word-1100-count.jsonl\n", - "Generated JSONL file with - 3835 max words, 100 samples - at ./dataset/gen-word-3835-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1765 max words - at ./dataset/shuffle-word-1765-count.jsonl\n", - "Generated JSONL file with - 2400 max words, 125 samples - at ./dataset/gen-word-2400-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", - "Generated JSONL file with - 1280 max words, 150 samples - at ./dataset/gen-word-1280-count.jsonl\n", - "Generated JSONL file with - 1520 max words, 150 samples - at ./dataset/gen-word-1520-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1735 max words - at ./dataset/shuffle-word-1735-count.jsonl\n", - "Generated JSONL file with - 1775 max words, 150 samples - at ./dataset/gen-word-1775-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 565 max words - at ./dataset/shuffle-word-565-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3685 max words - at ./dataset/shuffle-word-3685-count.jsonl\n", + "Generated JSONL file with - 2485 max words, 100 samples - at ./dataset/gen-word-2485-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3245 max words - at ./dataset/shuffle-word-3245-count.jsonl\n", "Generated JSONL file with - 3885 max words, 100 samples - at ./dataset/gen-word-3885-count.jsonl\n", - "Generated JSONL file with - 1610 max words, 150 samples - at ./dataset/gen-word-1610-count.jsonl\n", - "Generated JSONL file with - 2420 max words, 125 samples - at ./dataset/gen-word-2420-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", - "Generated JSONL file with - 1760 max words, 150 samples - at ./dataset/gen-word-1760-count.jsonl\n", - "Generated JSONL file with - 1045 max words, 150 samples - at ./dataset/gen-word-1045-count.jsonl\n", - "Generated JSONL file with - 1685 max words, 150 samples - at ./dataset/gen-word-1685-count.jsonl\n", - "Generated a single JSONL file with 296 samples (100 token repeat) - 1195 max words - at ./dataset/shuffle-word-1195-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1885 max words - at ./dataset/shuffle-word-1885-count.jsonl\n", - "Generated JSONL file with - 1485 max words, 150 samples - at ./dataset/gen-word-1485-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1405 max words - at ./dataset/shuffle-word-1405-count.jsonl\n", - "Generated JSONL file with - 2115 max words, 125 samples - at ./dataset/gen-word-2115-count.jsonl\n", - "Generated JSONL file with - 3410 max words, 100 samples - at ./dataset/gen-word-3410-count.jsonl\n", - "Generated a single JSONL file with 587 samples (100 token repeat) - 455 max words - at ./dataset/shuffle-word-455-count.jsonl\n", - "Generated JSONL file with - 2120 max words, 125 samples - at ./dataset/gen-word-2120-count.jsonl\n", - "Generated JSONL file with - 1605 max words, 150 samples - at ./dataset/gen-word-1605-count.jsonl\n", - "Generated a single JSONL file with 201 samples (100 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3995 max words - at ./dataset/shuffle-word-3995-count.jsonl\n", - "Generated JSONL file with - 3560 max words, 100 samples - at ./dataset/gen-word-3560-count.jsonl\n", - "Generated JSONL file with - 1465 max words, 150 samples - at ./dataset/gen-word-1465-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1565 max words - at ./dataset/shuffle-word-1565-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", - "Generated JSONL file with - 2355 max words, 125 samples - at ./dataset/gen-word-2355-count.jsonl\n", - "Generated JSONL file with - 2205 max words, 125 samples - at ./dataset/gen-word-2205-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", - "Generated a single JSONL file with 186 samples (100 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", - "Generated JSONL file with - 2085 max words, 125 samples - at ./dataset/gen-word-2085-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1965 max words - at ./dataset/shuffle-word-1965-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", - "Generated JSONL file with - 1920 max words, 150 samples - at ./dataset/gen-word-1920-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", - "Generated JSONL file with - 3990 max words, 100 samples - at ./dataset/gen-word-3990-count.jsonl\n", - "Generated JSONL file with - 1160 max words, 150 samples - at ./dataset/gen-word-1160-count.jsonl\n", - "Generated JSONL file with - 2245 max words, 125 samples - at ./dataset/gen-word-2245-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2295 max words - at ./dataset/shuffle-word-2295-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", - "Generated JSONL file with - 2335 max words, 125 samples - at ./dataset/gen-word-2335-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1945 max words - at ./dataset/shuffle-word-1945-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2085 max words - at ./dataset/shuffle-word-2085-count.jsonl\n", - "Generated JSONL file with - 1450 max words, 150 samples - at ./dataset/gen-word-1450-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1725 max words - at ./dataset/shuffle-word-1725-count.jsonl\n", + "Generated JSONL file with - 3580 max words, 100 samples - at ./dataset/gen-word-3580-count.jsonl\n", "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", - "Generated a single JSONL file with 147 samples (100 token repeat) - 2505 max words - at ./dataset/shuffle-word-2505-count.jsonl\n", - "Generated JSONL file with - 2625 max words, 125 samples - at ./dataset/gen-word-2625-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2815 max words - at ./dataset/shuffle-word-2815-count.jsonl\n", - "Generated JSONL file with - 2390 max words, 125 samples - at ./dataset/gen-word-2390-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", - "Generated a single JSONL file with 189 samples (100 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", - "Generated JSONL file with - 2730 max words, 125 samples - at ./dataset/gen-word-2730-count.jsonl\n", - "Generated JSONL file with - 2925 max words, 125 samples - at ./dataset/gen-word-2925-count.jsonl\n", - "Generated a single JSONL file with 580 samples (100 token repeat) - 465 max words - at ./dataset/shuffle-word-465-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1755 max words - at ./dataset/shuffle-word-1755-count.jsonl\n", - "Generated a single JSONL file with 931 samples (100 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", - "Generated JSONL file with - 2645 max words, 125 samples - at ./dataset/gen-word-2645-count.jsonl\n", - "Generated JSONL file with - 1790 max words, 150 samples - at ./dataset/gen-word-1790-count.jsonl\n", - "Generated JSONL file with - 2735 max words, 125 samples - at ./dataset/gen-word-2735-count.jsonl\n", - "Generated a single JSONL file with 253 samples (100 token repeat) - 1275 max words - at ./dataset/shuffle-word-1275-count.jsonl\n", - "Generated JSONL file with - 2370 max words, 125 samples - at ./dataset/gen-word-2370-count.jsonl\n", - "Generated JSONL file with - 1540 max words, 150 samples - at ./dataset/gen-word-1540-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1435 max words - at ./dataset/shuffle-word-1435-count.jsonl\n", - "Generated JSONL file with - 1285 max words, 150 samples - at ./dataset/gen-word-1285-count.jsonl\n", - "Generated JSONL file with - 2465 max words, 125 samples - at ./dataset/gen-word-2465-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", - "Generated JSONL file with - 1745 max words, 150 samples - at ./dataset/gen-word-1745-count.jsonl\n", - "Generated JSONL file with - 2005 max words, 125 samples - at ./dataset/gen-word-2005-count.jsonl\n", - "Generated a single JSONL file with 254 samples (100 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", - "Generated a single JSONL file with 404 samples (100 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", - "Generated JSONL file with - 1225 max words, 150 samples - at ./dataset/gen-word-1225-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1135 max words - at ./dataset/shuffle-word-1135-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3485 max words - at ./dataset/shuffle-word-3485-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", - "Generated JSONL file with - 2275 max words, 125 samples - at ./dataset/gen-word-2275-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1145 max words - at ./dataset/shuffle-word-1145-count.jsonl\n", - "Generated a single JSONL file with 154 samples (100 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", - "Generated JSONL file with - 2550 max words, 125 samples - at ./dataset/gen-word-2550-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", - "Generated a single JSONL file with 202 samples (100 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3770 max words - at ./dataset/shuffle-word-3770-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3560 max words - at ./dataset/shuffle-word-3560-count.jsonl\n", "Generated JSONL file with - 3820 max words, 100 samples - at ./dataset/gen-word-3820-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1785 max words - at ./dataset/shuffle-word-1785-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1595 max words - at ./dataset/shuffle-word-1595-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3540 max words - at ./dataset/shuffle-word-3540-count.jsonl\n", - "Generated a single JSONL file with 255 samples (100 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", - "Generated JSONL file with - 2415 max words, 125 samples - at ./dataset/gen-word-2415-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2995 max words - at ./dataset/shuffle-word-2995-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", - "Generated JSONL file with - 1655 max words, 150 samples - at ./dataset/gen-word-1655-count.jsonl\n", - "Generated JSONL file with - 1900 max words, 150 samples - at ./dataset/gen-word-1900-count.jsonl\n", - "Generated JSONL file with - 3215 max words, 100 samples - at ./dataset/gen-word-3215-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3860 max words - at ./dataset/shuffle-word-3860-count.jsonl\n", - "Generated JSONL file with - 3060 max words, 100 samples - at ./dataset/gen-word-3060-count.jsonl\n", - "Generated JSONL file with - 2175 max words, 125 samples - at ./dataset/gen-word-2175-count.jsonl\n", - "Generated JSONL file with - 1980 max words, 150 samples - at ./dataset/gen-word-1980-count.jsonl\n", - "Generated JSONL file with - 1740 max words, 150 samples - at ./dataset/gen-word-1740-count.jsonl\n", - "Generated JSONL file with - 2535 max words, 125 samples - at ./dataset/gen-word-2535-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonl\n", - "Generated a single JSONL file with 711 samples (100 token repeat) - 315 max words - at ./dataset/shuffle-word-315-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3420 max words - at ./dataset/shuffle-word-3420-count.jsonl\n", - "Generated JSONL file with - 2240 max words, 125 samples - at ./dataset/gen-word-2240-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", - "Generated JSONL file with - 1620 max words, 150 samples - at ./dataset/gen-word-1620-count.jsonl\n", - "Generated JSONL file with - 1490 max words, 150 samples - at ./dataset/gen-word-1490-count.jsonl\n", - "Generated JSONL file with - 3995 max words, 100 samples - at ./dataset/gen-word-3995-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3645 max words - at ./dataset/shuffle-word-3645-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", - "Generated a single JSONL file with 117 samples (100 token repeat) - 2625 max words - at ./dataset/shuffle-word-2625-count.jsonl\n", - "Generated JSONL file with - 2790 max words, 125 samples - at ./dataset/gen-word-2790-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", - "Generated JSONL file with - 1565 max words, 150 samples - at ./dataset/gen-word-1565-count.jsonl\n", - "Generated JSONL file with - 1240 max words, 150 samples - at ./dataset/gen-word-1240-count.jsonl\n", - "Generated JSONL file with - 2165 max words, 125 samples - at ./dataset/gen-word-2165-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3235 max words - at ./dataset/shuffle-word-3235-count.jsonl\n", - "Generated a single JSONL file with 149 samples (100 token repeat) - 2525 max words - at ./dataset/shuffle-word-2525-count.jsonl\n", - "Generated JSONL file with - 3945 max words, 100 samples - at ./dataset/gen-word-3945-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3955 max words - at ./dataset/shuffle-word-3955-count.jsonl\n", - "Generated a single JSONL file with 149 samples (100 token repeat) - 2535 max words - at ./dataset/shuffle-word-2535-count.jsonl\n", - "Generated JSONL file with - 2575 max words, 125 samples - at ./dataset/gen-word-2575-count.jsonl\n", - "Generated JSONL file with - 1120 max words, 150 samples - at ./dataset/gen-word-1120-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1045 max words - at ./dataset/shuffle-word-1045-count.jsonl\n", - "Generated JSONL file with - 1190 max words, 150 samples - at ./dataset/gen-word-1190-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1465 max words - at ./dataset/shuffle-word-1465-count.jsonl\n", - "Generated JSONL file with - 1145 max words, 150 samples - at ./dataset/gen-word-1145-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", - "Generated JSONL file with - 2445 max words, 125 samples - at ./dataset/gen-word-2445-count.jsonl\n", - "Generated JSONL file with - 1735 max words, 150 samples - at ./dataset/gen-word-1735-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3815 max words - at ./dataset/shuffle-word-3815-count.jsonl\n", - "Generated JSONL file with - 1035 max words, 150 samples - at ./dataset/gen-word-1035-count.jsonl\n", - "Generated JSONL file with - 1725 max words, 150 samples - at ./dataset/gen-word-1725-count.jsonl\n", - "Generated JSONL file with - 2285 max words, 125 samples - at ./dataset/gen-word-2285-count.jsonl\n", - "Generated a single JSONL file with 295 samples (100 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", - "Generated JSONL file with - 1635 max words, 150 samples - at ./dataset/gen-word-1635-count.jsonl\n", - "Generated JSONL file with - 3980 max words, 100 samples - at ./dataset/gen-word-3980-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1855 max words - at ./dataset/shuffle-word-1855-count.jsonl\n", - "Generated a single JSONL file with 5873 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", - "Generated JSONL file with - 3555 max words, 100 samples - at ./dataset/gen-word-3555-count.jsonl\n", - "Generated JSONL file with - 2435 max words, 125 samples - at ./dataset/gen-word-2435-count.jsonl\n", - "Generated JSONL file with - 3960 max words, 100 samples - at ./dataset/gen-word-3960-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2795 max words - at ./dataset/shuffle-word-2795-count.jsonl\n", - "Generated JSONL file with - 1820 max words, 150 samples - at ./dataset/gen-word-1820-count.jsonl\n", - "Generated JSONL file with - 1545 max words, 150 samples - at ./dataset/gen-word-1545-count.jsonl\n", - "Generated JSONL file with - 1125 max words, 150 samples - at ./dataset/gen-word-1125-count.jsonl\n", - "Generated a single JSONL file with 205 samples (100 token repeat) - 1305 max words - at ./dataset/shuffle-word-1305-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", - "Generated a single JSONL file with 1011 samples (100 token repeat) - 215 max words - at ./dataset/shuffle-word-215-count.jsonl\n", - "Generated JSONL file with - 1730 max words, 150 samples - at ./dataset/gen-word-1730-count.jsonl\n", - "Generated a single JSONL file with 202 samples (100 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", - "Generated JSONL file with - 1715 max words, 150 samples - at ./dataset/gen-word-1715-count.jsonl\n", - "Generated JSONL file with - 1805 max words, 150 samples - at ./dataset/gen-word-1805-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1095 max words - at ./dataset/shuffle-word-1095-count.jsonl\n", - "Generated JSONL file with - 1195 max words, 150 samples - at ./dataset/gen-word-1195-count.jsonl\n", - "Generated JSONL file with - 1315 max words, 150 samples - at ./dataset/gen-word-1315-count.jsonl\n", - "Generated JSONL file with - 2440 max words, 125 samples - at ./dataset/gen-word-2440-count.jsonl\n", - "Generated JSONL file with - 3985 max words, 100 samples - at ./dataset/gen-word-3985-count.jsonl\n", - "Generated a single JSONL file with 588 samples (100 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", - "Generated JSONL file with - 2505 max words, 125 samples - at ./dataset/gen-word-2505-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", - "Generated a single JSONL file with 259 samples (100 token repeat) - 1265 max words - at ./dataset/shuffle-word-1265-count.jsonl\n", + "Generated JSONL file with - 3505 max words, 100 samples - at ./dataset/gen-word-3505-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3610 max words - at ./dataset/shuffle-word-3610-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3665 max words - at ./dataset/shuffle-word-3665-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3720 max words - at ./dataset/shuffle-word-3720-count.jsonl\n", + "Generated JSONL file with - 3845 max words, 100 samples - at ./dataset/gen-word-3845-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3865 max words - at ./dataset/shuffle-word-3865-count.jsonl\n", + "Generated a single JSONL file with 125 samples (100 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3410 max words - at ./dataset/shuffle-word-3410-count.jsonl\n", + "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", + "Generated a single JSONL file with 152 samples (100 token repeat) - 2575 max words - at ./dataset/shuffle-word-2575-count.jsonl\n", + "Generated JSONL file with - 3620 max words, 100 samples - at ./dataset/gen-word-3620-count.jsonl\n", + "Generated JSONL file with - 3390 max words, 100 samples - at ./dataset/gen-word-3390-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4580 max words - at ./dataset/shuffle-word-4580-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3810 max words - at ./dataset/shuffle-word-3810-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", + "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", + "Generated JSONL file with - 3515 max words, 100 samples - at ./dataset/gen-word-3515-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3435 max words - at ./dataset/shuffle-word-3435-count.jsonl\n", + "Generated JSONL file with - 3790 max words, 100 samples - at ./dataset/gen-word-3790-count.jsonl\n", + "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", + "Generated a single JSONL file with 103 samples (100 token repeat) - 2715 max words - at ./dataset/shuffle-word-2715-count.jsonl\n", + "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3690 max words - at ./dataset/shuffle-word-3690-count.jsonl\n", + "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", + "\n", + "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonl\n", + "Generated JSONL file with - 3520 max words, 100 samples - at ./dataset/gen-word-3520-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3415 max words - at ./dataset/shuffle-word-3415-count.jsonl\n", + "Generated a single JSONL file with 149 samples (100 token repeat) - 2515 max words - at ./dataset/shuffle-word-2515-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3970 max words - at ./dataset/shuffle-word-3970-count.jsonlGenerated JSONL file with - 3770 max words, 100 samples - at ./dataset/gen-word-3770-count.jsonl\n", + "Generated JSONL file with - 3480 max words, 100 samples - at ./dataset/gen-word-3480-count.jsonl\n", + "\n", + "Generated a single JSONL file with 65288 samples (500 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated JSONL file with - 3685 max words, 100 samples - at ./dataset/gen-word-3685-count.jsonl\n", + "Generated JSONL file with - 3760 max words, 100 samples - at ./dataset/gen-word-3760-count.jsonl\n", + "Generated a single JSONL file with 148 samples (100 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4385 max words - at ./dataset/shuffle-word-4385-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4440 max words - at ./dataset/shuffle-word-4440-count.jsonl\n", + "Generated JSONL file with - 3755 max words, 100 samples - at ./dataset/gen-word-3755-count.jsonl\n", + "Generated JSONL file with - 3585 max words, 100 samples - at ./dataset/gen-word-3585-count.jsonl\n", + "Generated JSONL file with - 4580 max words, 100 samples - at ./dataset/gen-word-4580-count.jsonl\n", + "Generated a single JSONL file with 123 samples (100 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3430 max words - at ./dataset/shuffle-word-3430-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4930 max words - at ./dataset/shuffle-word-4930-count.jsonl\n", + "Generated JSONL file with - 3720 max words, 100 samples - at ./dataset/gen-word-3720-count.jsonl\n", + "Generated JSONL file with - 3690 max words, 100 samples - at ./dataset/gen-word-3690-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3805 max words - at ./dataset/shuffle-word-3805-count.jsonl\n", + "Generated JSONL file with - 3830 max words, 100 samples - at ./dataset/gen-word-3830-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4590 max words - at ./dataset/shuffle-word-4590-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4560 max words - at ./dataset/shuffle-word-4560-count.jsonl\n", + "Generated JSONL file with - 3570 max words, 100 samples - at ./dataset/gen-word-3570-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5035 max words - at ./dataset/shuffle-word-5035-count.jsonl\n", + "Generated JSONL file with - 3855 max words, 100 samples - at ./dataset/gen-word-3855-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5110 max words - at ./dataset/shuffle-word-5110-count.jsonl\n", + "Generated JSONL file with - 3530 max words, 100 samples - at ./dataset/gen-word-3530-count.jsonl\n", + "Generated JSONL file with - 4545 max words, 100 samples - at ./dataset/gen-word-4545-count.jsonl\n", + "Generated JSONL file with - 3840 max words, 100 samples - at ./dataset/gen-word-3840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5185 max words - at ./dataset/shuffle-word-5185-count.jsonl\n", + "Generated JSONL file with - 2505 max words, 100 samples - at ./dataset/gen-word-2505-count.jsonl\n", + "Generated a single JSONL file with 199 samples (100 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", + "Generated a single JSONL file with 121 samples (100 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", + "Generated JSONL file with - 3810 max words, 100 samples - at ./dataset/gen-word-3810-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", + "Generated JSONL file with - 4560 max words, 100 samples - at ./dataset/gen-word-4560-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3830 max words - at ./dataset/shuffle-word-3830-count.jsonl\n", + "Generated JSONL file with - 3595 max words, 100 samples - at ./dataset/gen-word-3595-count.jsonl\n", + "Generated a single JSONL file with 101 samples (100 token repeat) - 2725 max words - at ./dataset/shuffle-word-2725-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3910 max words - at ./dataset/shuffle-word-3910-count.jsonl\n", - "Generated JSONL file with - 1345 max words, 150 samples - at ./dataset/gen-word-1345-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", - "Generated JSONL file with - 2800 max words, 125 samples - at ./dataset/gen-word-2800-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4870 max words - at ./dataset/shuffle-word-4870-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4355 max words - at ./dataset/shuffle-word-4355-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4700 max words - at ./dataset/shuffle-word-4700-count.jsonl\n", "Generated JSONL file with - 3615 max words, 100 samples - at ./dataset/gen-word-3615-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3960 max words - at ./dataset/shuffle-word-3960-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", - "Generated a single JSONL file with 105 samples (100 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", - "Generated a single JSONL file with 197 samples (100 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2005 max words - at ./dataset/shuffle-word-2005-count.jsonl\n", - "Generated JSONL file with - 1600 max words, 150 samples - at ./dataset/gen-word-1600-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", - "Generated JSONL file with - 2060 max words, 125 samples - at ./dataset/gen-word-2060-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2285 max words - at ./dataset/shuffle-word-2285-count.jsonl\n", - "Generated JSONL file with - 1680 max words, 150 samples - at ./dataset/gen-word-1680-count.jsonl\n", - "Generated JSONL file with - 1230 max words, 150 samples - at ./dataset/gen-word-1230-count.jsonl\n", - "Generated JSONL file with - 3490 max words, 100 samples - at ./dataset/gen-word-3490-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4635 max words - at ./dataset/shuffle-word-4635-count.jsonl\n", + "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", + "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5010 max words - at ./dataset/shuffle-word-5010-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", + "Generated JSONL file with - 5035 max words, 100 samples - at ./dataset/gen-word-5035-count.jsonl\n", + "Generated a single JSONL file with 105 samples (100 token repeat) - 2755 max words - at ./dataset/shuffle-word-2755-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4485 max words - at ./dataset/shuffle-word-4485-count.jsonl\n", + "Generated JSONL file with - 4605 max words, 100 samples - at ./dataset/gen-word-4605-count.jsonl\n", + "Generated a single JSONL file with 152 samples (100 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", + "Generated a single JSONL file with 121 samples (100 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5115 max words - at ./dataset/shuffle-word-5115-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5120 max words - at ./dataset/shuffle-word-5120-count.jsonl\n", + "Generated JSONL file with - 5130 max words, 100 samples - at ./dataset/gen-word-5130-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5055 max words - at ./dataset/shuffle-word-5055-count.jsonl\n", + "\n", + "Generated JSONL file with - 5185 max words, 100 samples - at ./dataset/gen-word-5185-count.jsonl\n", + "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", + "Generated a single JSONL file with 102 samples (100 token repeat) - 2745 max words - at ./dataset/shuffle-word-2745-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5060 max words - at ./dataset/shuffle-word-5060-count.jsonl\n", + "Generated a single JSONL file with 118 samples (100 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", + "Generated JSONL file with - 3660 max words, 100 samples - at ./dataset/gen-word-3660-count.jsonl\n", + "Generated a single JSONL file with 114 samples (100 token repeat) - 2695 max words - at ./dataset/shuffle-word-2695-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3860 max words - at ./dataset/shuffle-word-3860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4715 max words - at ./dataset/shuffle-word-4715-count.jsonl\n", + "Generated a single JSONL file with 149 samples (100 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", + "Generated a single JSONL file with 104 samples (100 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", + "Generated a single JSONL file with 121 samples (100 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", + "Generated JSONL file with - 3670 max words, 100 samples - at ./dataset/gen-word-3670-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", + "Generated JSONL file with - 2675 max words, 100 samples - at ./dataset/gen-word-2675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4375 max words - at ./dataset/shuffle-word-4375-count.jsonl\n", + "Generated JSONL file with - 2490 max words, 100 samples - at ./dataset/gen-word-2490-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3310 max words - at ./dataset/shuffle-word-3310-count.jsonl\n", + "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", + "Generated JSONL file with - 2520 max words, 100 samples - at ./dataset/gen-word-2520-count.jsonl\n", + "Generated JSONL file with - 4385 max words, 100 samples - at ./dataset/gen-word-4385-count.jsonl\n", + "Generated a single JSONL file with 150 samples (100 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5240 max words - at ./dataset/shuffle-word-5240-count.jsonl\n", + "Generated JSONL file with - 4445 max words, 100 samples - at ./dataset/gen-word-4445-count.jsonl\n", + "Generated JSONL file with - 4565 max words, 100 samples - at ./dataset/gen-word-4565-count.jsonl\n", + "Generated JSONL file with - 3695 max words, 100 samples - at ./dataset/gen-word-3695-count.jsonl\n", + "Generated JSONL file with - 2685 max words, 100 samples - at ./dataset/gen-word-2685-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4605 max words - at ./dataset/shuffle-word-4605-count.jsonl\n", + "Generated a single JSONL file with 121 samples (100 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", + "Generated JSONL file with - 3645 max words, 100 samples - at ./dataset/gen-word-3645-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3840 max words - at ./dataset/shuffle-word-3840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5565 max words - at ./dataset/shuffle-word-5565-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5290 max words - at ./dataset/shuffle-word-5290-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4410 max words - at ./dataset/shuffle-word-4410-count.jsonl\n", + "Generated a single JSONL file with 88977 samples (500 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5340 max words - at ./dataset/shuffle-word-5340-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4610 max words - at ./dataset/shuffle-word-4610-count.jsonlGenerated a single JSONL file with 104 samples (100 token repeat) - 2765 max words - at ./dataset/shuffle-word-2765-count.jsonl\n", + "Generated a single JSONL file with 102 samples (100 token repeat) - 2795 max words - at ./dataset/shuffle-word-2795-count.jsonl\n", + "\n", + "Generated JSONL file with - 2690 max words, 100 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5180 max words - at ./dataset/shuffle-word-5180-count.jsonl\n", + "Generated JSONL file with - 3815 max words, 100 samples - at ./dataset/gen-word-3815-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5020 max words - at ./dataset/shuffle-word-5020-count.jsonl\n", + "Generated JSONL file with - 5110 max words, 100 samples - at ./dataset/gen-word-5110-count.jsonl\n", + "Generated a single JSONL file with 52995 samples (500 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated a single JSONL file with 101 samples (100 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3015 max words - at ./dataset/shuffle-word-3015-count.jsonl\n", + "Generated JSONL file with - 3270 max words, 100 samples - at ./dataset/gen-word-3270-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", + "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", + "Generated JSONL file with - 2740 max words, 100 samples - at ./dataset/gen-word-2740-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3095 max words - at ./dataset/shuffle-word-3095-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4670 max words - at ./dataset/shuffle-word-4670-count.jsonl\n", + "Generated a single JSONL file with 105 samples (100 token repeat) - 2785 max words - at ./dataset/shuffle-word-2785-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5445 max words - at ./dataset/shuffle-word-5445-count.jsonl\n", + "Generated a single JSONL file with 102 samples (100 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3040 max words - at ./dataset/shuffle-word-3040-count.jsonl\n", + "Generated JSONL file with - 2670 max words, 100 samples - at ./dataset/gen-word-2670-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4615 max words - at ./dataset/shuffle-word-4615-count.jsonl\n", + "Generated JSONL file with - 4935 max words, 100 samples - at ./dataset/gen-word-4935-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3895 max words - at ./dataset/shuffle-word-3895-count.jsonl\n", + "Generated JSONL file with - 3445 max words, 100 samples - at ./dataset/gen-word-3445-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4035 max words - at ./dataset/shuffle-word-4035-count.jsonl\n", + "Generated a single JSONL file with 102 samples (100 token repeat) - 2775 max words - at ./dataset/shuffle-word-2775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4885 max words - at ./dataset/shuffle-word-4885-count.jsonl\n", + "Generated a single JSONL file with 101 samples (100 token repeat) - 2705 max words - at ./dataset/shuffle-word-2705-count.jsonl\n", + "Generated JSONL file with - 3260 max words, 100 samples - at ./dataset/gen-word-3260-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", + "Generated JSONL file with - 4645 max words, 100 samples - at ./dataset/gen-word-4645-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5085 max words - at ./dataset/shuffle-word-5085-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5470 max words - at ./dataset/shuffle-word-5470-count.jsonl\n", + "Generated JSONL file with - 4615 max words, 100 samples - at ./dataset/gen-word-4615-count.jsonl\n", + "Generated a single JSONL file with 105 samples (100 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated JSONL file with - 4040 max words, 100 samples - at ./dataset/gen-word-4040-count.jsonl\n", + "Generated JSONL file with - 5885 max words, 100 samples - at ./dataset/gen-word-5885-count.jsonl\n", + "Generated JSONL file with - 3430 max words, 100 samples - at ./dataset/gen-word-3430-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2845 max words - at ./dataset/shuffle-word-2845-count.jsonl\n", + "Generated JSONL file with - 2370 max words, 100 samples - at ./dataset/gen-word-2370-count.jsonl\n", + "\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4310 max words - at ./dataset/shuffle-word-4310-count.jsonl\n", + "Generated JSONL file with - 4335 max words, 100 samples - at ./dataset/gen-word-4335-count.jsonl\n", + "Generated JSONL file with - 5895 max words, 100 samples - at ./dataset/gen-word-5895-count.jsonlGenerated JSONL file with - 5120 max words, 100 samples - at ./dataset/gen-word-5120-count.jsonl\n", + "\n", + "Generated JSONL file with - 2525 max words, 100 samples - at ./dataset/gen-word-2525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3260 max words - at ./dataset/shuffle-word-3260-count.jsonl\n", + "Generated JSONL file with - 2500 max words, 100 samples - at ./dataset/gen-word-2500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5015 max words - at ./dataset/shuffle-word-5015-count.jsonl\n", + "Generated JSONL file with - 5010 max words, 100 samples - at ./dataset/gen-word-5010-count.jsonl\n", + "Generated JSONL file with - 5060 max words, 100 samples - at ./dataset/gen-word-5060-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5880 max words - at ./dataset/shuffle-word-5880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4695 max words - at ./dataset/shuffle-word-4695-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4680 max words - at ./dataset/shuffle-word-4680-count.jsonl\n", + "Generated JSONL file with - 4365 max words, 100 samples - at ./dataset/gen-word-4365-count.jsonl\n", + "Generated JSONL file with - 2630 max words, 100 samples - at ./dataset/gen-word-2630-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4790 max words - at ./dataset/shuffle-word-4790-count.jsonl\n", + "Generated JSONL file with - 5055 max words, 100 samples - at ./dataset/gen-word-5055-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4080 max words - at ./dataset/shuffle-word-4080-count.jsonl\n", + "Generated a single JSONL file with 43603 samples (500 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated JSONL file with - 5040 max words, 100 samples - at ./dataset/gen-word-5040-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4780 max words - at ./dataset/shuffle-word-4780-count.jsonl\n", + "Generated JSONL file with - 3420 max words, 100 samples - at ./dataset/gen-word-3420-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5760 max words - at ./dataset/shuffle-word-5760-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5845 max words - at ./dataset/shuffle-word-5845-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", + "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5970 max words - at ./dataset/shuffle-word-5970-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated JSONL file with - 4665 max words, 100 samples - at ./dataset/gen-word-4665-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4705 max words - at ./dataset/shuffle-word-4705-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3890 max words - at ./dataset/shuffle-word-3890-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4545 max words - at ./dataset/shuffle-word-4545-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3130 max words - at ./dataset/shuffle-word-3130-count.jsonl\n", + "Generated JSONL file with - 5155 max words, 100 samples - at ./dataset/gen-word-5155-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4210 max words - at ./dataset/shuffle-word-4210-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", + "Generated JSONL file with - 4155 max words, 100 samples - at ./dataset/gen-word-4155-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3360 max words - at ./dataset/shuffle-word-3360-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5560 max words - at ./dataset/shuffle-word-5560-count.jsonl\n", + "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", + "Generated JSONL file with - 5190 max words, 100 samples - at ./dataset/gen-word-5190-count.jsonl\n", + "Generated JSONL file with - 4840 max words, 100 samples - at ./dataset/gen-word-4840-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 100 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated JSONL file with - 4070 max words, 100 samples - at ./dataset/gen-word-4070-count.jsonl\n", + "Generated JSONL file with - 4420 max words, 100 samples - at ./dataset/gen-word-4420-count.jsonl\n", + "Generated JSONL file with - 4515 max words, 100 samples - at ./dataset/gen-word-4515-count.jsonl\n", + "Generated JSONL file with - 4860 max words, 100 samples - at ./dataset/gen-word-4860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3185 max words - at ./dataset/shuffle-word-3185-count.jsonl\n", + "Generated JSONL file with - 3415 max words, 100 samples - at ./dataset/gen-word-3415-count.jsonl\n", + "Generated a single JSONL file with 102 samples (100 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", + "Generated JSONL file with - 2915 max words, 100 samples - at ./dataset/gen-word-2915-count.jsonl\n", + "Generated JSONL file with - 4995 max words, 100 samples - at ./dataset/gen-word-4995-count.jsonlGenerated JSONL file with - 2950 max words, 100 samples - at ./dataset/gen-word-2950-count.jsonlGenerated JSONL file with - 5115 max words, 100 samples - at ./dataset/gen-word-5115-count.jsonl\n", + "\n", + "\n", + "Generated JSONL file with - 4405 max words, 100 samples - at ./dataset/gen-word-4405-count.jsonl\n", + "Generated JSONL file with - 2660 max words, 100 samples - at ./dataset/gen-word-2660-count.jsonl\n", + "Generated JSONL file with - 4660 max words, 100 samples - at ./dataset/gen-word-4660-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4110 max words - at ./dataset/shuffle-word-4110-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 2935 max words - at ./dataset/shuffle-word-2935-count.jsonl\n", - "Generated a single JSONL file with 190 samples (100 token repeat) - 2455 max words - at ./dataset/shuffle-word-2455-count.jsonl\n", - "Generated JSONL file with - 1500 max words, 150 samples - at ./dataset/gen-word-1500-count.jsonl\n", - "Generated JSONL file with - 2850 max words, 125 samples - at ./dataset/gen-word-2850-count.jsonl\n", - "Generated a single JSONL file with 154 samples (100 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2165 max words - at ./dataset/shuffle-word-2165-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1025 max words - at ./dataset/shuffle-word-1025-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2965 max words - at ./dataset/shuffle-word-2965-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3440 max words - at ./dataset/shuffle-word-3440-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3770 max words - at ./dataset/shuffle-word-3770-count.jsonl\n", - "Generated JSONL file with - 1350 max words, 150 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated JSONL file with - 2775 max words, 100 samples - at ./dataset/gen-word-2775-count.jsonl\n", + "Generated JSONL file with - 2645 max words, 100 samples - at ./dataset/gen-word-2645-count.jsonl\n", + "Generated JSONL file with - 5470 max words, 100 samples - at ./dataset/gen-word-5470-count.jsonl\n", + "Generated JSONL file with - 2900 max words, 100 samples - at ./dataset/gen-word-2900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", + "Generated JSONL file with - 2725 max words, 100 samples - at ./dataset/gen-word-2725-count.jsonl\n", + "Generated JSONL file with - 4190 max words, 100 samples - at ./dataset/gen-word-4190-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3065 max words - at ./dataset/shuffle-word-3065-count.jsonlGenerated JSONL file with - 3115 max words, 100 samples - at ./dataset/gen-word-3115-count.jsonl\n", + "\n", + "Generated a single JSONL file with 130339 samples (500 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4785 max words - at ./dataset/shuffle-word-4785-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4795 max words - at ./dataset/shuffle-word-4795-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5005 max words - at ./dataset/shuffle-word-5005-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5490 max words - at ./dataset/shuffle-word-5490-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4415 max words - at ./dataset/shuffle-word-4415-count.jsonl\n", + "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", + "Generated JSONL file with - 4260 max words, 100 samples - at ./dataset/gen-word-4260-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3220 max words - at ./dataset/shuffle-word-3220-count.jsonl\n", - "Generated JSONL file with - 1975 max words, 150 samples - at ./dataset/gen-word-1975-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3790 max words - at ./dataset/shuffle-word-3790-count.jsonl\n", - "Generated JSONL file with - 2810 max words, 125 samples - at ./dataset/gen-word-2810-count.jsonl\n", - "Generated JSONL file with - 1325 max words, 150 samples - at ./dataset/gen-word-1325-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2975 max words - at ./dataset/shuffle-word-2975-count.jsonl\n", - "Generated JSONL file with - 2015 max words, 125 samples - at ./dataset/gen-word-2015-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1635 max words - at ./dataset/shuffle-word-1635-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3460 max words - at ./dataset/shuffle-word-3460-count.jsonl\n", - "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", - "Generated JSONL file with - 2145 max words, 125 samples - at ./dataset/gen-word-2145-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", - "Generated JSONL file with - 2030 max words, 125 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", + "Generated JSONL file with - 2865 max words, 100 samples - at ./dataset/gen-word-2865-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5045 max words - at ./dataset/shuffle-word-5045-count.jsonl\n", + "Generated JSONL file with - 4535 max words, 100 samples - at ./dataset/gen-word-4535-count.jsonl\n", + "Generated JSONL file with - 2855 max words, 100 samples - at ./dataset/gen-word-2855-count.jsonl\n", + "Generated JSONL file with - 2455 max words, 100 samples - at ./dataset/gen-word-2455-count.jsonl\n", + "Generated JSONL file with - 5080 max words, 100 samples - at ./dataset/gen-word-5080-count.jsonl\n", + "Generated JSONL file with - 4095 max words, 100 samples - at ./dataset/gen-word-4095-count.jsonl\n", + "Generated a single JSONL file with 103 samples (100 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 2915 max words - at ./dataset/shuffle-word-2915-count.jsonl\n", - "Generated a single JSONL file with 184 samples (100 token repeat) - 2415 max words - at ./dataset/shuffle-word-2415-count.jsonl\n", - "Generated a single JSONL file with 117 samples (100 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", - "Generated JSONL file with - 2265 max words, 125 samples - at ./dataset/gen-word-2265-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2985 max words - at ./dataset/shuffle-word-2985-count.jsonl\n", - "Generated JSONL file with - 2725 max words, 125 samples - at ./dataset/gen-word-2725-count.jsonl\n", - "Generated JSONL file with - 1955 max words, 150 samples - at ./dataset/gen-word-1955-count.jsonl\n", - "Generated JSONL file with - 2270 max words, 125 samples - at ./dataset/gen-word-2270-count.jsonl\n", - "Generated JSONL file with - 1755 max words, 150 samples - at ./dataset/gen-word-1755-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2055 max words - at ./dataset/shuffle-word-2055-count.jsonl\n", + "Generated JSONL file with - 2870 max words, 100 samples - at ./dataset/gen-word-2870-count.jsonl\n", + "Generated a single JSONL file with 198 samples (100 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", + "Generated JSONL file with - 2715 max words, 100 samples - at ./dataset/gen-word-2715-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", + "Generated JSONL file with - 3215 max words, 100 samples - at ./dataset/gen-word-3215-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5665 max words - at ./dataset/shuffle-word-5665-count.jsonl\n", + "Generated JSONL file with - 4695 max words, 100 samples - at ./dataset/gen-word-4695-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2925 max words - at ./dataset/shuffle-word-2925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3085 max words - at ./dataset/shuffle-word-3085-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2815 max words - at ./dataset/shuffle-word-2815-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3855 max words - at ./dataset/shuffle-word-3855-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4865 max words - at ./dataset/shuffle-word-4865-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2835 max words - at ./dataset/shuffle-word-2835-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3365 max words - at ./dataset/shuffle-word-3365-count.jsonl\n", + "Generated JSONL file with - 2875 max words, 100 samples - at ./dataset/gen-word-2875-count.jsonl\n", + "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", + "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2945 max words - at ./dataset/shuffle-word-2945-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4920 max words - at ./dataset/shuffle-word-4920-count.jsonl\n", + "Generated JSONL file with - 2940 max words, 100 samples - at ./dataset/gen-word-2940-count.jsonl\n", + "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", + "Generated JSONL file with - 2720 max words, 100 samples - at ./dataset/gen-word-2720-count.jsonl\n", + "Generated JSONL file with - 2555 max words, 100 samples - at ./dataset/gen-word-2555-count.jsonl\n", + "Generated JSONL file with - 2755 max words, 100 samples - at ./dataset/gen-word-2755-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3230 max words - at ./dataset/shuffle-word-3230-count.jsonl\n", + "Generated JSONL file with - 2985 max words, 100 samples - at ./dataset/gen-word-2985-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", + "Generated JSONL file with - 5565 max words, 100 samples - at ./dataset/gen-word-5565-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4090 max words - at ./dataset/shuffle-word-4090-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4395 max words - at ./dataset/shuffle-word-4395-count.jsonl\n", + "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5380 max words - at ./dataset/shuffle-word-5380-count.jsonl\n", + "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5130 max words - at ./dataset/shuffle-word-5130-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", + "Generated JSONL file with - 2640 max words, 100 samples - at ./dataset/gen-word-2640-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3285 max words - at ./dataset/shuffle-word-3285-count.jsonl\n", + "Generated JSONL file with - 4920 max words, 100 samples - at ./dataset/gen-word-4920-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5965 max words - at ./dataset/shuffle-word-5965-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3885 max words - at ./dataset/shuffle-word-3885-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3870 max words - at ./dataset/shuffle-word-3870-count.jsonl\n", - "Generated JSONL file with - 2130 max words, 125 samples - at ./dataset/gen-word-2130-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3140 max words - at ./dataset/shuffle-word-3140-count.jsonl\n", - "Generated JSONL file with - 2125 max words, 125 samples - at ./dataset/gen-word-2125-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2865 max words - at ./dataset/shuffle-word-2865-count.jsonl\n", - "Generated JSONL file with - 2050 max words, 125 samples - at ./dataset/gen-word-2050-count.jsonl\n", - "Generated JSONL file with - 1840 max words, 150 samples - at ./dataset/gen-word-1840-count.jsonl\n", - "Generated JSONL file with - 3780 max words, 100 samples - at ./dataset/gen-word-3780-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", - "Generated JSONL file with - 3955 max words, 100 samples - at ./dataset/gen-word-3955-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", - "Generated JSONL file with - 2450 max words, 125 samples - at ./dataset/gen-word-2450-count.jsonl\n", - "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1705 max words - at ./dataset/shuffle-word-1705-count.jsonl\n", - "Generated a single JSONL file with 125 samples (100 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2115 max words - at ./dataset/shuffle-word-2115-count.jsonl\n", - "Generated a single JSONL file with 246 samples (100 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", - "Generated JSONL file with - 2065 max words, 125 samples - at ./dataset/gen-word-2065-count.jsonl\n", - "Generated JSONL file with - 2910 max words, 125 samples - at ./dataset/gen-word-2910-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1775 max words - at ./dataset/shuffle-word-1775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3030 max words - at ./dataset/shuffle-word-3030-count.jsonl\n", + "Generated a single JSONL file with 123 samples (100 token repeat) - 2685 max words - at ./dataset/shuffle-word-2685-count.jsonl\n", + "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4075 max words - at ./dataset/shuffle-word-4075-count.jsonl\n", + "Generated JSONL file with - 5570 max words, 100 samples - at ./dataset/gen-word-5570-count.jsonl\n", + "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3940 max words - at ./dataset/shuffle-word-3940-count.jsonl\n", + "Generated JSONL file with - 4020 max words, 100 samples - at ./dataset/gen-word-4020-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4330 max words - at ./dataset/shuffle-word-4330-count.jsonl\n", + "Generated JSONL file with - 3230 max words, 100 samples - at ./dataset/gen-word-3230-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4345 max words - at ./dataset/shuffle-word-4345-count.jsonl\n", + "Generated JSONL file with - 5140 max words, 100 samples - at ./dataset/gen-word-5140-count.jsonl\n", + "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", + "Generated JSONL file with - 2780 max words, 100 samples - at ./dataset/gen-word-2780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3915 max words - at ./dataset/shuffle-word-3915-count.jsonl\n", + "Generated JSONL file with - 4910 max words, 100 samples - at ./dataset/gen-word-4910-count.jsonl\n", + "Generated JSONL file with - 2880 max words, 100 samples - at ./dataset/gen-word-2880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3945 max words - at ./dataset/shuffle-word-3945-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5385 max words - at ./dataset/shuffle-word-5385-count.jsonlGenerated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", + "\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5605 max words - at ./dataset/shuffle-word-5605-count.jsonl\n", + "Generated JSONL file with - 2955 max words, 100 samples - at ./dataset/gen-word-2955-count.jsonl\n", + "Generated JSONL file with - 5270 max words, 100 samples - at ./dataset/gen-word-5270-count.jsonl\n", + "Generated JSONL file with - 4490 max words, 100 samples - at ./dataset/gen-word-4490-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3055 max words - at ./dataset/shuffle-word-3055-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4085 max words - at ./dataset/shuffle-word-4085-count.jsonl\n", + "Generated JSONL file with - 5315 max words, 100 samples - at ./dataset/gen-word-5315-count.jsonl\n", + "Generated JSONL file with - 2680 max words, 100 samples - at ./dataset/gen-word-2680-count.jsonl\n", + "Generated JSONL file with - 5880 max words, 100 samples - at ./dataset/gen-word-5880-count.jsonl\n", + "Generated JSONL file with - 5965 max words, 100 samples - at ./dataset/gen-word-5965-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4015 max words - at ./dataset/shuffle-word-4015-count.jsonl\n", + "Generated JSONL file with - 4930 max words, 100 samples - at ./dataset/gen-word-4930-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4445 max words - at ./dataset/shuffle-word-4445-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4165 max words - at ./dataset/shuffle-word-4165-count.jsonl\n", + "Generated JSONL file with - 3005 max words, 100 samples - at ./dataset/gen-word-3005-count.jsonl\n", + "Generated JSONL file with - 4195 max words, 100 samples - at ./dataset/gen-word-4195-count.jsonl\n", + "Generated JSONL file with - 4025 max words, 100 samples - at ./dataset/gen-word-4025-count.jsonl\n", + "Generated JSONL file with - 5240 max words, 100 samples - at ./dataset/gen-word-5240-count.jsonl\n", + "Generated JSONL file with - 2635 max words, 100 samples - at ./dataset/gen-word-2635-count.jsonl\n", + "Generated JSONL file with - 5335 max words, 100 samples - at ./dataset/gen-word-5335-count.jsonl\n", + "Generated JSONL file with - 2480 max words, 100 samples - at ./dataset/gen-word-2480-count.jsonl\n", + "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4240 max words - at ./dataset/shuffle-word-4240-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4290 max words - at ./dataset/shuffle-word-4290-count.jsonl\n", + "Generated JSONL file with - 4395 max words, 100 samples - at ./dataset/gen-word-4395-count.jsonl\n", + "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", + "Generated JSONL file with - 2825 max words, 100 samples - at ./dataset/gen-word-2825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", + "Generated JSONL file with - 4370 max words, 100 samples - at ./dataset/gen-word-4370-count.jsonl\n", + "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", + "Generated JSONL file with - 4285 max words, 100 samples - at ./dataset/gen-word-4285-count.jsonl\n", + "Generated JSONL file with - 5615 max words, 100 samples - at ./dataset/gen-word-5615-count.jsonl\n", + "Generated JSONL file with - 2785 max words, 100 samples - at ./dataset/gen-word-2785-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", + "Generated JSONL file with - 4755 max words, 100 samples - at ./dataset/gen-word-4755-count.jsonl\n", + "Generated JSONL file with - 5845 max words, 100 samples - at ./dataset/gen-word-5845-count.jsonl\n", + "Generated JSONL file with - 5855 max words, 100 samples - at ./dataset/gen-word-5855-count.jsonl\n", + "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4360 max words - at ./dataset/shuffle-word-4360-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2985 max words - at ./dataset/shuffle-word-2985-count.jsonl\n", + "Generated JSONL file with - 2885 max words, 100 samples - at ./dataset/gen-word-2885-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4135 max words - at ./dataset/shuffle-word-4135-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5210 max words - at ./dataset/shuffle-word-5210-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5355 max words - at ./dataset/shuffle-word-5355-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4030 max words - at ./dataset/shuffle-word-4030-count.jsonl\n", + "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5670 max words - at ./dataset/shuffle-word-5670-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", + "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", + "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", + "Generated JSONL file with - 5015 max words, 100 samples - at ./dataset/gen-word-5015-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3380 max words - at ./dataset/shuffle-word-3380-count.jsonl\n", + "Generated JSONL file with - 4205 max words, 100 samples - at ./dataset/gen-word-4205-count.jsonl\n", + "Generated JSONL file with - 3890 max words, 100 samples - at ./dataset/gen-word-3890-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3215 max words - at ./dataset/shuffle-word-3215-count.jsonl\n", + "Generated JSONL file with - 3120 max words, 100 samples - at ./dataset/gen-word-3120-count.jsonl\n", + "Generated JSONL file with - 3805 max words, 100 samples - at ./dataset/gen-word-3805-count.jsonl\n", + "Generated JSONL file with - 3055 max words, 100 samples - at ./dataset/gen-word-3055-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3395 max words - at ./dataset/shuffle-word-3395-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3845 max words - at ./dataset/shuffle-word-3845-count.jsonl\n", - "Generated JSONL file with - 1405 max words, 150 samples - at ./dataset/gen-word-1405-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1955 max words - at ./dataset/shuffle-word-1955-count.jsonl\n", - "Generated a single JSONL file with 205 samples (100 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", - "Generated a single JSONL file with 117 samples (100 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1875 max words - at ./dataset/shuffle-word-1875-count.jsonl\n", - "Generated JSONL file with - 2215 max words, 125 samples - at ./dataset/gen-word-2215-count.jsonl\n", - "Generated JSONL file with - 3880 max words, 100 samples - at ./dataset/gen-word-3880-count.jsonl\n", - "Generated JSONL file with - 2460 max words, 125 samples - at ./dataset/gen-word-2460-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2045 max words - at ./dataset/shuffle-word-2045-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2995 max words - at ./dataset/shuffle-word-2995-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4245 max words - at ./dataset/shuffle-word-4245-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2805 max words - at ./dataset/shuffle-word-2805-count.jsonl\n", + "Generated JSONL file with - 3355 max words, 100 samples - at ./dataset/gen-word-3355-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2895 max words - at ./dataset/shuffle-word-2895-count.jsonl\n", + "Generated JSONL file with - 3180 max words, 100 samples - at ./dataset/gen-word-3180-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2905 max words - at ./dataset/shuffle-word-2905-count.jsonl\n", + "Generated JSONL file with - 3040 max words, 100 samples - at ./dataset/gen-word-3040-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4235 max words - at ./dataset/shuffle-word-4235-count.jsonl\n", + "Generated JSONL file with - 3185 max words, 100 samples - at ./dataset/gen-word-3185-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4020 max words - at ./dataset/shuffle-word-4020-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4490 max words - at ./dataset/shuffle-word-4490-count.jsonl\n", + "Generated a single JSONL file with 101 samples (100 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2855 max words - at ./dataset/shuffle-word-2855-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2825 max words - at ./dataset/shuffle-word-2825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5985 max words - at ./dataset/shuffle-word-5985-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", + "Generated JSONL file with - 3070 max words, 100 samples - at ./dataset/gen-word-3070-count.jsonl\n", + "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", + "Generated JSONL file with - 4120 max words, 100 samples - at ./dataset/gen-word-4120-count.jsonl\n", + "Generated JSONL file with - 3195 max words, 100 samples - at ./dataset/gen-word-3195-count.jsonl\n", + "Generated JSONL file with - 4030 max words, 100 samples - at ./dataset/gen-word-4030-count.jsonl\n", + "Generated JSONL file with - 2790 max words, 100 samples - at ./dataset/gen-word-2790-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3035 max words - at ./dataset/shuffle-word-3035-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4380 max words - at ./dataset/shuffle-word-4380-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5365 max words - at ./dataset/shuffle-word-5365-count.jsonl\n", + "Generated JSONL file with - 3315 max words, 100 samples - at ./dataset/gen-word-3315-count.jsonl\n", + "Generated JSONL file with - 3290 max words, 100 samples - at ./dataset/gen-word-3290-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4190 max words - at ./dataset/shuffle-word-4190-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5305 max words - at ./dataset/shuffle-word-5305-count.jsonl\n", + "Generated JSONL file with - 2700 max words, 100 samples - at ./dataset/gen-word-2700-count.jsonl\n", + "Generated JSONL file with - 5780 max words, 100 samples - at ./dataset/gen-word-5780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3140 max words - at ./dataset/shuffle-word-3140-count.jsonl\n", + "Generated JSONL file with - 5180 max words, 100 samples - at ./dataset/gen-word-5180-count.jsonl\n", + "Generated JSONL file with - 2850 max words, 100 samples - at ./dataset/gen-word-2850-count.jsonl\n", + "Generated JSONL file with - 4590 max words, 100 samples - at ./dataset/gen-word-4590-count.jsonl\n", + "Generated JSONL file with - 5955 max words, 100 samples - at ./dataset/gen-word-5955-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5835 max words - at ./dataset/shuffle-word-5835-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3280 max words - at ./dataset/shuffle-word-3280-count.jsonl\n", + "Generated JSONL file with - 4505 max words, 100 samples - at ./dataset/gen-word-4505-count.jsonl\n", + "Generated JSONL file with - 3135 max words, 100 samples - at ./dataset/gen-word-3135-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4540 max words - at ./dataset/shuffle-word-4540-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4125 max words - at ./dataset/shuffle-word-4125-count.jsonl\n", + "Generated JSONL file with - 5320 max words, 100 samples - at ./dataset/gen-word-5320-count.jsonl\n", + "Generated JSONL file with - 3920 max words, 100 samples - at ./dataset/gen-word-3920-count.jsonl\n", + "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", + "Generated JSONL file with - 4940 max words, 100 samples - at ./dataset/gen-word-4940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5785 max words - at ./dataset/shuffle-word-5785-count.jsonl\n", + "Generated JSONL file with - 4990 max words, 100 samples - at ./dataset/gen-word-4990-count.jsonl\n", + "Generated JSONL file with - 3360 max words, 100 samples - at ./dataset/gen-word-3360-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3070 max words - at ./dataset/shuffle-word-3070-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3235 max words - at ./dataset/shuffle-word-3235-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4155 max words - at ./dataset/shuffle-word-4155-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4045 max words - at ./dataset/shuffle-word-4045-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3265 max words - at ./dataset/shuffle-word-3265-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3290 max words - at ./dataset/shuffle-word-3290-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2975 max words - at ./dataset/shuffle-word-2975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4140 max words - at ./dataset/shuffle-word-4140-count.jsonl\n", + "Generated JSONL file with - 4245 max words, 100 samples - at ./dataset/gen-word-4245-count.jsonl\n", + "Generated JSONL file with - 2800 max words, 100 samples - at ./dataset/gen-word-2800-count.jsonl\n", + "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", + "Generated JSONL file with - 3860 max words, 100 samples - at ./dataset/gen-word-3860-count.jsonl\n", + "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", + "Generated JSONL file with - 2980 max words, 100 samples - at ./dataset/gen-word-2980-count.jsonl\n", + "Generated JSONL file with - 3410 max words, 100 samples - at ./dataset/gen-word-3410-count.jsonl\n", + "Generated JSONL file with - 2710 max words, 100 samples - at ./dataset/gen-word-2710-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3990 max words - at ./dataset/shuffle-word-3990-count.jsonl\n", + "Generated JSONL file with - 5690 max words, 100 samples - at ./dataset/gen-word-5690-count.jsonl\n", + "Generated JSONL file with - 3970 max words, 100 samples - at ./dataset/gen-word-3970-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4575 max words - at ./dataset/shuffle-word-4575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5410 max words - at ./dataset/shuffle-word-5410-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4595 max words - at ./dataset/shuffle-word-4595-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", + "Generated JSONL file with - 2860 max words, 100 samples - at ./dataset/gen-word-2860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3020 max words - at ./dataset/shuffle-word-3020-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4945 max words - at ./dataset/shuffle-word-4945-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5790 max words - at ./dataset/shuffle-word-5790-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5690 max words - at ./dataset/shuffle-word-5690-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4010 max words - at ./dataset/shuffle-word-4010-count.jsonl\n", + "Generated JSONL file with - 3130 max words, 100 samples - at ./dataset/gen-word-3130-count.jsonl\n", + "Generated JSONL file with - 5555 max words, 100 samples - at ./dataset/gen-word-5555-count.jsonl\n", "Generated JSONL file with - 3140 max words, 100 samples - at ./dataset/gen-word-3140-count.jsonl\n", - "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", - "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", - "Generated JSONL file with - 2720 max words, 125 samples - at ./dataset/gen-word-2720-count.jsonl\n", - "Generated JSONL file with - 2665 max words, 125 samples - at ./dataset/gen-word-2665-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3940 max words - at ./dataset/shuffle-word-3940-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3345 max words - at ./dataset/shuffle-word-3345-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1375 max words - at ./dataset/shuffle-word-1375-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 995 max words - at ./dataset/shuffle-word-995-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2215 max words - at ./dataset/shuffle-word-2215-count.jsonl\n", - "Generated a single JSONL file with 154 samples (100 token repeat) - 2585 max words - at ./dataset/shuffle-word-2585-count.jsonl\n", - "Generated JSONL file with - 2660 max words, 125 samples - at ./dataset/gen-word-2660-count.jsonl\n", - "Generated JSONL file with - 2520 max words, 125 samples - at ./dataset/gen-word-2520-count.jsonl\n", - "Generated JSONL file with - 2610 max words, 125 samples - at ./dataset/gen-word-2610-count.jsonl\n", - "Generated JSONL file with - 2220 max words, 125 samples - at ./dataset/gen-word-2220-count.jsonl\n", - "Generated JSONL file with - 2870 max words, 125 samples - at ./dataset/gen-word-2870-count.jsonl\n", - "Generated JSONL file with - 2525 max words, 125 samples - at ./dataset/gen-word-2525-count.jsonl\n", - "Generated JSONL file with - 2385 max words, 125 samples - at ./dataset/gen-word-2385-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", - "Generated JSONL file with - 2250 max words, 125 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated JSONL file with - 4350 max words, 100 samples - at ./dataset/gen-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4570 max words - at ./dataset/shuffle-word-4570-count.jsonl\n", + "Generated JSONL file with - 2895 max words, 100 samples - at ./dataset/gen-word-2895-count.jsonl\n", + "Generated JSONL file with - 5005 max words, 100 samples - at ./dataset/gen-word-5005-count.jsonl\n", + "Generated JSONL file with - 3465 max words, 100 samples - at ./dataset/gen-word-3465-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5105 max words - at ./dataset/shuffle-word-5105-count.jsonl\n", + "Generated JSONL file with - 2765 max words, 100 samples - at ./dataset/gen-word-2765-count.jsonl\n", + "Generated JSONL file with - 2745 max words, 100 samples - at ./dataset/gen-word-2745-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5465 max words - at ./dataset/shuffle-word-5465-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3010 max words - at ./dataset/shuffle-word-3010-count.jsonl\n", + "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4195 max words - at ./dataset/shuffle-word-4195-count.jsonl\n", + "Generated JSONL file with - 5305 max words, 100 samples - at ./dataset/gen-word-5305-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3815 max words - at ./dataset/shuffle-word-3815-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5095 max words - at ./dataset/shuffle-word-5095-count.jsonl\n", + "Generated JSONL file with - 2815 max words, 100 samples - at ./dataset/gen-word-2815-count.jsonl\n", + "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", + "Generated JSONL file with - 3395 max words, 100 samples - at ./dataset/gen-word-3395-count.jsonl\n", + "Generated JSONL file with - 2730 max words, 100 samples - at ./dataset/gen-word-2730-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5215 max words - at ./dataset/shuffle-word-5215-count.jsonl\n", + "Generated JSONL file with - 2820 max words, 100 samples - at ./dataset/gen-word-2820-count.jsonl\n", + "Generated a single JSONL file with 18868 samples (500 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated JSONL file with - 3010 max words, 100 samples - at ./dataset/gen-word-3010-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4565 max words - at ./dataset/shuffle-word-4565-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4305 max words - at ./dataset/shuffle-word-4305-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", + "Generated JSONL file with - 4415 max words, 100 samples - at ./dataset/gen-word-4415-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5610 max words - at ./dataset/shuffle-word-5610-count.jsonl\n", + "Generated JSONL file with - 4225 max words, 100 samples - at ./dataset/gen-word-4225-count.jsonl\n", + "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", + "Generated JSONL file with - 2760 max words, 100 samples - at ./dataset/gen-word-2760-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5930 max words - at ./dataset/shuffle-word-5930-count.jsonl\n", + "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", + "Generated JSONL file with - 4270 max words, 100 samples - at ./dataset/gen-word-4270-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4460 max words - at ./dataset/shuffle-word-4460-count.jsonl\n", + "Generated JSONL file with - 5540 max words, 100 samples - at ./dataset/gen-word-5540-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5870 max words - at ./dataset/shuffle-word-5870-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5980 max words - at ./dataset/shuffle-word-5980-count.jsonl\n", + "Generated JSONL file with - 4355 max words, 100 samples - at ./dataset/gen-word-4355-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5710 max words - at ./dataset/shuffle-word-5710-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3920 max words - at ./dataset/shuffle-word-3920-count.jsonl\n", + "Generated JSONL file with - 3915 max words, 100 samples - at ./dataset/gen-word-3915-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", + "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5705 max words - at ./dataset/shuffle-word-5705-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3340 max words - at ./dataset/shuffle-word-3340-count.jsonl\n", + "Generated JSONL file with - 3145 max words, 100 samples - at ./dataset/gen-word-3145-count.jsonl\n", + "Generated JSONL file with - 3870 max words, 100 samples - at ./dataset/gen-word-3870-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4910 max words - at ./dataset/shuffle-word-4910-count.jsonl\n", + "Generated JSONL file with - 2935 max words, 100 samples - at ./dataset/gen-word-2935-count.jsonl\n", + "Generated JSONL file with - 3990 max words, 100 samples - at ./dataset/gen-word-3990-count.jsonl\n", + "Generated JSONL file with - 3995 max words, 100 samples - at ./dataset/gen-word-3995-count.jsonl\n", + "Generated JSONL file with - 3455 max words, 100 samples - at ./dataset/gen-word-3455-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3255 max words - at ./dataset/shuffle-word-3255-count.jsonl\n", + "Generated JSONL file with - 4570 max words, 100 samples - at ./dataset/gen-word-4570-count.jsonl\n", + "Generated JSONL file with - 5590 max words, 100 samples - at ./dataset/gen-word-5590-count.jsonl\n", + "Generated JSONL file with - 5770 max words, 100 samples - at ./dataset/gen-word-5770-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5245 max words - at ./dataset/shuffle-word-5245-count.jsonl\n", + "Generated JSONL file with - 5640 max words, 100 samples - at ./dataset/gen-word-5640-count.jsonl\n", + "Generated JSONL file with - 3965 max words, 100 samples - at ./dataset/gen-word-3965-count.jsonl\n", + "Generated JSONL file with - 3435 max words, 100 samples - at ./dataset/gen-word-3435-count.jsonl\n", + "Generated JSONL file with - 3090 max words, 100 samples - at ./dataset/gen-word-3090-count.jsonl\n", + "Generated JSONL file with - 2830 max words, 100 samples - at ./dataset/gen-word-2830-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4585 max words - at ./dataset/shuffle-word-4585-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", + "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3135 max words - at ./dataset/shuffle-word-3135-count.jsonl\n", + "Generated a single JSONL file with 14732 samples (500 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3470 max words - at ./dataset/shuffle-word-3470-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4300 max words - at ./dataset/shuffle-word-4300-count.jsonl\n", + "Generated JSONL file with - 3110 max words, 100 samples - at ./dataset/gen-word-3110-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5860 max words - at ./dataset/shuffle-word-5860-count.jsonl\n", + "Generated JSONL file with - 3880 max words, 100 samples - at ./dataset/gen-word-3880-count.jsonl\n", + "Generated JSONL file with - 4105 max words, 100 samples - at ./dataset/gen-word-4105-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", + "Generated JSONL file with - 2845 max words, 100 samples - at ./dataset/gen-word-2845-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3880 max words - at ./dataset/shuffle-word-3880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3330 max words - at ./dataset/shuffle-word-3330-count.jsonl\n", + "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4965 max words - at ./dataset/shuffle-word-4965-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3905 max words - at ./dataset/shuffle-word-3905-count.jsonl\n", + "Generated JSONL file with - 3345 max words, 100 samples - at ./dataset/gen-word-3345-count.jsonl\n", + "Generated JSONL file with - 2810 max words, 100 samples - at ./dataset/gen-word-2810-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5040 max words - at ./dataset/shuffle-word-5040-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5765 max words - at ./dataset/shuffle-word-5765-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3195 max words - at ./dataset/shuffle-word-3195-count.jsonl\n", + "Generated JSONL file with - 2750 max words, 100 samples - at ./dataset/gen-word-2750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5030 max words - at ./dataset/shuffle-word-5030-count.jsonl\n", + "Generated JSONL file with - 2805 max words, 100 samples - at ./dataset/gen-word-2805-count.jsonl\n", + "Generated JSONL file with - 4630 max words, 100 samples - at ./dataset/gen-word-4630-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5140 max words - at ./dataset/shuffle-word-5140-count.jsonl\n", + "Generated JSONL file with - 2840 max words, 100 samples - at ./dataset/gen-word-2840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4735 max words - at ./dataset/shuffle-word-4735-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3115 max words - at ./dataset/shuffle-word-3115-count.jsonl\n", + "Generated JSONL file with - 3365 max words, 100 samples - at ./dataset/gen-word-3365-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5395 max words - at ./dataset/shuffle-word-5395-count.jsonl\n", + "Generated JSONL file with - 3210 max words, 100 samples - at ./dataset/gen-word-3210-count.jsonl\n", + "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4855 max words - at ./dataset/shuffle-word-4855-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5155 max words - at ./dataset/shuffle-word-5155-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5695 max words - at ./dataset/shuffle-word-5695-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4710 max words - at ./dataset/shuffle-word-4710-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3315 max words - at ./dataset/shuffle-word-3315-count.jsonl\n", + "Generated JSONL file with - 4780 max words, 100 samples - at ./dataset/gen-word-4780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5345 max words - at ./dataset/shuffle-word-5345-count.jsonl\n", + "Generated JSONL file with - 2835 max words, 100 samples - at ./dataset/gen-word-2835-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4430 max words - at ./dataset/shuffle-word-4430-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4455 max words - at ./dataset/shuffle-word-4455-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3390 max words - at ./dataset/shuffle-word-3390-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3820 max words - at ./dataset/shuffle-word-3820-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5735 max words - at ./dataset/shuffle-word-5735-count.jsonl\n", + "Generated JSONL file with - 3220 max words, 100 samples - at ./dataset/gen-word-3220-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4060 max words - at ./dataset/shuffle-word-4060-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3465 max words - at ./dataset/shuffle-word-3465-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5505 max words - at ./dataset/shuffle-word-5505-count.jsonl\n", + "Generated JSONL file with - 3960 max words, 100 samples - at ./dataset/gen-word-3960-count.jsonl\n", + "Generated JSONL file with - 5680 max words, 100 samples - at ./dataset/gen-word-5680-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4435 max words - at ./dataset/shuffle-word-4435-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4835 max words - at ./dataset/shuffle-word-4835-count.jsonl\n", + "Generated JSONL file with - 4325 max words, 100 samples - at ./dataset/gen-word-4325-count.jsonl\n", + "Generated JSONL file with - 3190 max words, 100 samples - at ./dataset/gen-word-3190-count.jsonl\n", + "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5935 max words - at ./dataset/shuffle-word-5935-count.jsonl\n", + "Generated JSONL file with - 2925 max words, 100 samples - at ./dataset/gen-word-2925-count.jsonl\n", + "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3455 max words - at ./dataset/shuffle-word-3455-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5230 max words - at ./dataset/shuffle-word-5230-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3160 max words - at ./dataset/shuffle-word-3160-count.jsonl\n", + "Generated JSONL file with - 4585 max words, 100 samples - at ./dataset/gen-word-4585-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4170 max words - at ./dataset/shuffle-word-4170-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4655 max words - at ./dataset/shuffle-word-4655-count.jsonl\n", + "Generated JSONL file with - 4640 max words, 100 samples - at ./dataset/gen-word-4640-count.jsonl\n", + "Generated JSONL file with - 5345 max words, 100 samples - at ./dataset/gen-word-5345-count.jsonl\n", + "Generated JSONL file with - 2910 max words, 100 samples - at ./dataset/gen-word-2910-count.jsonl\n", + "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4180 max words - at ./dataset/shuffle-word-4180-count.jsonl\n", + "Generated JSONL file with - 3940 max words, 100 samples - at ./dataset/gen-word-3940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3165 max words - at ./dataset/shuffle-word-3165-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4230 max words - at ./dataset/shuffle-word-4230-count.jsonl\n", + "Generated JSONL file with - 2995 max words, 100 samples - at ./dataset/gen-word-2995-count.jsonl\n", + "Generated JSONL file with - 4295 max words, 100 samples - at ./dataset/gen-word-4295-count.jsonl\n", + "Generated JSONL file with - 4085 max words, 100 samples - at ./dataset/gen-word-4085-count.jsonl\n", + "Generated JSONL file with - 2945 max words, 100 samples - at ./dataset/gen-word-2945-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", + "Generated JSONL file with - 3945 max words, 100 samples - at ./dataset/gen-word-3945-count.jsonl\n", + "Generated JSONL file with - 4235 max words, 100 samples - at ./dataset/gen-word-4235-count.jsonl\n", + "Generated JSONL file with - 3240 max words, 100 samples - at ./dataset/gen-word-3240-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4810 max words - at ./dataset/shuffle-word-4810-count.jsonl\n", + "Generated JSONL file with - 5195 max words, 100 samples - at ./dataset/gen-word-5195-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3955 max words - at ./dataset/shuffle-word-3955-count.jsonl\n", + "Generated JSONL file with - 3865 max words, 100 samples - at ./dataset/gen-word-3865-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3320 max words - at ./dataset/shuffle-word-3320-count.jsonl\n", + "Generated JSONL file with - 3035 max words, 100 samples - at ./dataset/gen-word-3035-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4665 max words - at ./dataset/shuffle-word-4665-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", + "Generated JSONL file with - 4280 max words, 100 samples - at ./dataset/gen-word-4280-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3965 max words - at ./dataset/shuffle-word-3965-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4185 max words - at ./dataset/shuffle-word-4185-count.jsonl\n", + "Generated JSONL file with - 5960 max words, 100 samples - at ./dataset/gen-word-5960-count.jsonl\n", + "Generated JSONL file with - 5090 max words, 100 samples - at ./dataset/gen-word-5090-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3295 max words - at ./dataset/shuffle-word-3295-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4840 max words - at ./dataset/shuffle-word-4840-count.jsonl\n", + "Generated JSONL file with - 3905 max words, 100 samples - at ./dataset/gen-word-3905-count.jsonl\n", + "Generated a single JSONL file with 20382 samples (500 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5065 max words - at ./dataset/shuffle-word-5065-count.jsonl\n", + "Generated JSONL file with - 3305 max words, 100 samples - at ./dataset/gen-word-3305-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5235 max words - at ./dataset/shuffle-word-5235-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3335 max words - at ./dataset/shuffle-word-3335-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4510 max words - at ./dataset/shuffle-word-4510-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4745 max words - at ./dataset/shuffle-word-4745-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4065 max words - at ./dataset/shuffle-word-4065-count.jsonl\n", + "Generated JSONL file with - 3165 max words, 100 samples - at ./dataset/gen-word-3165-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5255 max words - at ./dataset/shuffle-word-5255-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3210 max words - at ./dataset/shuffle-word-3210-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", + "Generated JSONL file with - 2990 max words, 100 samples - at ./dataset/gen-word-2990-count.jsonl\n", + "Generated JSONL file with - 4715 max words, 100 samples - at ./dataset/gen-word-4715-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3205 max words - at ./dataset/shuffle-word-3205-count.jsonl\n", - "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", - "Generated JSONL file with - 3360 max words, 100 samples - at ./dataset/gen-word-3360-count.jsonl\n", - "Generated a single JSONL file with 187 samples (100 token repeat) - 2445 max words - at ./dataset/shuffle-word-2445-count.jsonl\n", - "Generated a single JSONL file with 152 samples (100 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", - "Generated JSONL file with - 3340 max words, 100 samples - at ./dataset/gen-word-3340-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3105 max words - at ./dataset/shuffle-word-3105-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2065 max words - at ./dataset/shuffle-word-2065-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2775 max words - at ./dataset/shuffle-word-2775-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", - "Generated JSONL file with - 2475 max words, 125 samples - at ./dataset/gen-word-2475-count.jsonl\n", - "Generated JSONL file with - 3170 max words, 100 samples - at ./dataset/gen-word-3170-count.jsonl\n", - "Generated JSONL file with - 3080 max words, 100 samples - at ./dataset/gen-word-3080-count.jsonl\n", - "Generated JSONL file with - 3105 max words, 100 samples - at ./dataset/gen-word-3105-count.jsonl\n", + "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5495 max words - at ./dataset/shuffle-word-5495-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5660 max words - at ./dataset/shuffle-word-5660-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3995 max words - at ./dataset/shuffle-word-3995-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", + "Generated JSONL file with - 4210 max words, 100 samples - at ./dataset/gen-word-4210-count.jsonl\n", + "Generated JSONL file with - 4905 max words, 100 samples - at ./dataset/gen-word-4905-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3935 max words - at ./dataset/shuffle-word-3935-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3920 max words - at ./dataset/shuffle-word-3920-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3030 max words - at ./dataset/shuffle-word-3030-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2195 max words - at ./dataset/shuffle-word-2195-count.jsonl\n", - "Generated a single JSONL file with 104 samples (100 token repeat) - 2735 max words - at ./dataset/shuffle-word-2735-count.jsonl\n", - "Generated JSONL file with - 3190 max words, 100 samples - at ./dataset/gen-word-3190-count.jsonl\n", + "Generated JSONL file with - 3160 max words, 100 samples - at ./dataset/gen-word-3160-count.jsonl\n", + "Generated JSONL file with - 3955 max words, 100 samples - at ./dataset/gen-word-3955-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5360 max words - at ./dataset/shuffle-word-5360-count.jsonl\n", + "Generated JSONL file with - 3835 max words, 100 samples - at ./dataset/gen-word-3835-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3270 max words - at ./dataset/shuffle-word-3270-count.jsonl\n", + "Generated JSONL file with - 4330 max words, 100 samples - at ./dataset/gen-word-4330-count.jsonl\n", + "Generated JSONL file with - 3385 max words, 100 samples - at ./dataset/gen-word-3385-count.jsonl\n", + "Generated JSONL file with - 5255 max words, 100 samples - at ./dataset/gen-word-5255-count.jsonl\n", + "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", + "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5520 max words - at ./dataset/shuffle-word-5520-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5285 max words - at ./dataset/shuffle-word-5285-count.jsonl\n", + "Generated JSONL file with - 2970 max words, 100 samples - at ./dataset/gen-word-2970-count.jsonl\n", + "Generated JSONL file with - 5815 max words, 100 samples - at ./dataset/gen-word-5815-count.jsonl\n", + "Generated JSONL file with - 5230 max words, 100 samples - at ./dataset/gen-word-5230-count.jsonl\n", + "Generated JSONL file with - 3935 max words, 100 samples - at ./dataset/gen-word-3935-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5805 max words - at ./dataset/shuffle-word-5805-count.jsonl\n", + "Generated JSONL file with - 4110 max words, 100 samples - at ./dataset/gen-word-4110-count.jsonl\n", + "Generated JSONL file with - 5705 max words, 100 samples - at ./dataset/gen-word-5705-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5205 max words - at ./dataset/shuffle-word-5205-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3460 max words - at ./dataset/shuffle-word-3460-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5570 max words - at ./dataset/shuffle-word-5570-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", + "Generated JSONL file with - 3080 max words, 100 samples - at ./dataset/gen-word-3080-count.jsonl\n", + "Generated JSONL file with - 2905 max words, 100 samples - at ./dataset/gen-word-2905-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5905 max words - at ./dataset/shuffle-word-5905-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3370 max words - at ./dataset/shuffle-word-3370-count.jsonl\n", + "Generated JSONL file with - 5260 max words, 100 samples - at ./dataset/gen-word-5260-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5910 max words - at ./dataset/shuffle-word-5910-count.jsonl\n", + "Generated a single JSONL file with 21936 samples (500 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated JSONL file with - 5580 max words, 100 samples - at ./dataset/gen-word-5580-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5555 max words - at ./dataset/shuffle-word-5555-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5945 max words - at ./dataset/shuffle-word-5945-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", + "Generated JSONL file with - 4005 max words, 100 samples - at ./dataset/gen-word-4005-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5390 max words - at ./dataset/shuffle-word-5390-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5795 max words - at ./dataset/shuffle-word-5795-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3445 max words - at ./dataset/shuffle-word-3445-count.jsonl\n", + "Generated JSONL file with - 2965 max words, 100 samples - at ./dataset/gen-word-2965-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5770 max words - at ./dataset/shuffle-word-5770-count.jsonl\n", + "Generated JSONL file with - 4960 max words, 100 samples - at ./dataset/gen-word-4960-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4765 max words - at ./dataset/shuffle-word-4765-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5430 max words - at ./dataset/shuffle-word-5430-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5315 max words - at ./dataset/shuffle-word-5315-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3870 max words - at ./dataset/shuffle-word-3870-count.jsonl\n", + "Generated JSONL file with - 5395 max words, 100 samples - at ./dataset/gen-word-5395-count.jsonl\n", + "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", + "Generated JSONL file with - 4610 max words, 100 samples - at ./dataset/gen-word-4610-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", - "Generated JSONL file with - 3585 max words, 100 samples - at ./dataset/gen-word-3585-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3405 max words - at ./dataset/shuffle-word-3405-count.jsonl\n", + "Generated JSONL file with - 5970 max words, 100 samples - at ./dataset/gen-word-5970-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3440 max words - at ./dataset/shuffle-word-3440-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3960 max words - at ./dataset/shuffle-word-3960-count.jsonl\n", + "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", + "Generated JSONL file with - 4765 max words, 100 samples - at ./dataset/gen-word-4765-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4295 max words - at ./dataset/shuffle-word-4295-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5890 max words - at ./dataset/shuffle-word-5890-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4105 max words - at ./dataset/shuffle-word-4105-count.jsonl\n", + "Generated JSONL file with - 4835 max words, 100 samples - at ./dataset/gen-word-4835-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5310 max words - at ./dataset/shuffle-word-5310-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3420 max words - at ./dataset/shuffle-word-3420-count.jsonl\n", + "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", + "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", + "Generated JSONL file with - 4045 max words, 100 samples - at ./dataset/gen-word-4045-count.jsonl\n", + "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1425 max words - at ./dataset/shuffle-word-1425-count.jsonl\n", - "Generated JSONL file with - 2710 max words, 125 samples - at ./dataset/gen-word-2710-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3045 max words - at ./dataset/shuffle-word-3045-count.jsonl\n", - "Generated a single JSONL file with 186 samples (100 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", - "Generated JSONL file with - 1970 max words, 150 samples - at ./dataset/gen-word-1970-count.jsonl\n", - "Generated JSONL file with - 2965 max words, 125 samples - at ./dataset/gen-word-2965-count.jsonl\n", - "Generated JSONL file with - 1950 max words, 150 samples - at ./dataset/gen-word-1950-count.jsonl\n", - "Generated JSONL file with - 2885 max words, 125 samples - at ./dataset/gen-word-2885-count.jsonl\n", - "Generated JSONL file with - 3920 max words, 100 samples - at ./dataset/gen-word-3920-count.jsonl\n", - "Generated JSONL file with - 2765 max words, 125 samples - at ./dataset/gen-word-2765-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2175 max words - at ./dataset/shuffle-word-2175-count.jsonl\n", - "Generated JSONL file with - 2780 max words, 125 samples - at ./dataset/gen-word-2780-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", - "Generated JSONL file with - 3180 max words, 100 samples - at ./dataset/gen-word-3180-count.jsonl\n", - "Generated JSONL file with - 3135 max words, 100 samples - at ./dataset/gen-word-3135-count.jsonl\n", - "Generated JSONL file with - 2025 max words, 125 samples - at ./dataset/gen-word-2025-count.jsonl\n", - "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", - "Generated JSONL file with - 2160 max words, 125 samples - at ./dataset/gen-word-2160-count.jsonl\n", - "Generated a single JSONL file with 157 samples (100 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", - "Generated JSONL file with - 3755 max words, 100 samples - at ./dataset/gen-word-3755-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5330 max words - at ./dataset/shuffle-word-5330-count.jsonl\n", + "Generated JSONL file with - 4215 max words, 100 samples - at ./dataset/gen-word-4215-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3385 max words - at ./dataset/shuffle-word-3385-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", + "Generated JSONL file with - 5330 max words, 100 samples - at ./dataset/gen-word-5330-count.jsonl\n", + "Generated JSONL file with - 3105 max words, 100 samples - at ./dataset/gen-word-3105-count.jsonl\n", + "Generated JSONL file with - 5860 max words, 100 samples - at ./dataset/gen-word-5860-count.jsonl\n", + "Generated JSONL file with - 2960 max words, 100 samples - at ./dataset/gen-word-2960-count.jsonl\n", + "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", + "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5480 max words - at ./dataset/shuffle-word-5480-count.jsonl\n", + "Generated JSONL file with - 5940 max words, 100 samples - at ./dataset/gen-word-5940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4095 max words - at ./dataset/shuffle-word-4095-count.jsonl\n", + "Generated JSONL file with - 3060 max words, 100 samples - at ./dataset/gen-word-3060-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3835 max words - at ./dataset/shuffle-word-3835-count.jsonl\n", + "Generated JSONL file with - 4240 max words, 100 samples - at ./dataset/gen-word-4240-count.jsonl\n", "Generated JSONL file with - 3020 max words, 100 samples - at ./dataset/gen-word-3020-count.jsonl\n", - "Generated JSONL file with - 2255 max words, 125 samples - at ./dataset/gen-word-2255-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1445 max words - at ./dataset/shuffle-word-1445-count.jsonl\n", - "Generated JSONL file with - 3000 max words, 125 samples - at ./dataset/gen-word-3000-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonl\n", - "Generated a single JSONL file with 117 samples (100 token repeat) - 2615 max words - at ./dataset/shuffle-word-2615-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3165 max words - at ./dataset/shuffle-word-3165-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3060 max words - at ./dataset/shuffle-word-3060-count.jsonl\n", - "Generated a single JSONL file with 103 samples (100 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", - "Generated JSONL file with - 3370 max words, 100 samples - at ./dataset/gen-word-3370-count.jsonl\n", - "Generated JSONL file with - 2570 max words, 125 samples - at ./dataset/gen-word-2570-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2835 max words - at ./dataset/shuffle-word-2835-count.jsonl\n", - "Generated a single JSONL file with 161 samples (100 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", - "Generated JSONL file with - 2565 max words, 125 samples - at ./dataset/gen-word-2565-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", - "Generated JSONL file with - 2970 max words, 125 samples - at ./dataset/gen-word-2970-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2245 max words - at ./dataset/shuffle-word-2245-count.jsonl\n", - "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", - "Generated JSONL file with - 3915 max words, 100 samples - at ./dataset/gen-word-3915-count.jsonl\n", - "Generated JSONL file with - 2995 max words, 125 samples - at ./dataset/gen-word-2995-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3210 max words - at ./dataset/shuffle-word-3210-count.jsonl\n", - "Generated JSONL file with - 2805 max words, 125 samples - at ./dataset/gen-word-2805-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3305 max words - at ./dataset/shuffle-word-3305-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3495 max words - at ./dataset/shuffle-word-3495-count.jsonl\n", - "Generated JSONL file with - 985 max words, 150 samples - at ./dataset/gen-word-985-count.jsonl\n", - "Generated JSONL file with - 2495 max words, 125 samples - at ./dataset/gen-word-2495-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4285 max words - at ./dataset/shuffle-word-4285-count.jsonl\n", + "Generated JSONL file with - 3030 max words, 100 samples - at ./dataset/gen-word-3030-count.jsonl\n", + "Generated JSONL file with - 4670 max words, 100 samples - at ./dataset/gen-word-4670-count.jsonl\n", + "Generated JSONL file with - 4710 max words, 100 samples - at ./dataset/gen-word-4710-count.jsonl\n", + "Generated JSONL file with - 4510 max words, 100 samples - at ./dataset/gen-word-4510-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4770 max words - at ./dataset/shuffle-word-4770-count.jsonl\n", + "Generated a single JSONL file with 29393 samples (500 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", + "Generated JSONL file with - 4080 max words, 100 samples - at ./dataset/gen-word-4080-count.jsonl\n", + "Generated JSONL file with - 3310 max words, 100 samples - at ./dataset/gen-word-3310-count.jsonl\n", + "Generated JSONL file with - 3155 max words, 100 samples - at ./dataset/gen-word-3155-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3985 max words - at ./dataset/shuffle-word-3985-count.jsonl\n", + "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5895 max words - at ./dataset/shuffle-word-5895-count.jsonl\n", + "Generated JSONL file with - 4315 max words, 100 samples - at ./dataset/gen-word-4315-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4390 max words - at ./dataset/shuffle-word-4390-count.jsonl\n", + "Generated JSONL file with - 4035 max words, 100 samples - at ./dataset/gen-word-4035-count.jsonl\n", + "Generated JSONL file with - 5435 max words, 100 samples - at ./dataset/gen-word-5435-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", + "Generated JSONL file with - 4455 max words, 100 samples - at ./dataset/gen-word-4455-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4860 max words - at ./dataset/shuffle-word-4860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4820 max words - at ./dataset/shuffle-word-4820-count.jsonl\n", + "Generated JSONL file with - 3065 max words, 100 samples - at ./dataset/gen-word-3065-count.jsonl\n", + "Generated JSONL file with - 4230 max words, 100 samples - at ./dataset/gen-word-4230-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4465 max words - at ./dataset/shuffle-word-4465-count.jsonl\n", + "Generated JSONL file with - 4390 max words, 100 samples - at ./dataset/gen-word-4390-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4470 max words - at ./dataset/shuffle-word-4470-count.jsonl\n", + "Generated JSONL file with - 5380 max words, 100 samples - at ./dataset/gen-word-5380-count.jsonl\n", + "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", + "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4720 max words - at ./dataset/shuffle-word-4720-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5265 max words - at ./dataset/shuffle-word-5265-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4130 max words - at ./dataset/shuffle-word-4130-count.jsonl\n", "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", + "Generated JSONL file with - 5980 max words, 100 samples - at ./dataset/gen-word-5980-count.jsonl\n", + "Generated JSONL file with - 4945 max words, 100 samples - at ./dataset/gen-word-4945-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4040 max words - at ./dataset/shuffle-word-4040-count.jsonl\n", + "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5540 max words - at ./dataset/shuffle-word-5540-count.jsonl\n", + "Generated a single JSONL file with 16520 samples (500 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", + "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", + "Generated JSONL file with - 3255 max words, 100 samples - at ./dataset/gen-word-3255-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4730 max words - at ./dataset/shuffle-word-4730-count.jsonl\n", + "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", + "Generated JSONL file with - 5285 max words, 100 samples - at ./dataset/gen-word-5285-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", + "Generated JSONL file with - 4485 max words, 100 samples - at ./dataset/gen-word-4485-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5275 max words - at ./dataset/shuffle-word-5275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4120 max words - at ./dataset/shuffle-word-4120-count.jsonl\n", + "Generated JSONL file with - 3245 max words, 100 samples - at ./dataset/gen-word-3245-count.jsonl\n", + "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", + "Generated JSONL file with - 3985 max words, 100 samples - at ./dataset/gen-word-3985-count.jsonl\n", + "Generated JSONL file with - 3280 max words, 100 samples - at ./dataset/gen-word-3280-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4520 max words - at ./dataset/shuffle-word-4520-count.jsonl\n", + "Generated JSONL file with - 4015 max words, 100 samples - at ./dataset/gen-word-4015-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5090 max words - at ./dataset/shuffle-word-5090-count.jsonl\n", + "Generated JSONL file with - 4685 max words, 100 samples - at ./dataset/gen-word-4685-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5535 max words - at ./dataset/shuffle-word-5535-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", + "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", + "Generated JSONL file with - 5085 max words, 100 samples - at ./dataset/gen-word-5085-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4160 max words - at ./dataset/shuffle-word-4160-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5405 max words - at ./dataset/shuffle-word-5405-count.jsonl\n", + "Generated JSONL file with - 4140 max words, 100 samples - at ./dataset/gen-word-4140-count.jsonl\n", + "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4005 max words - at ./dataset/shuffle-word-4005-count.jsonl\n", + "Generated JSONL file with - 3170 max words, 100 samples - at ./dataset/gen-word-3170-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5160 max words - at ./dataset/shuffle-word-5160-count.jsonl\n", + "Generated JSONL file with - 5020 max words, 100 samples - at ./dataset/gen-word-5020-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4370 max words - at ./dataset/shuffle-word-4370-count.jsonl\n", + "Generated JSONL file with - 4010 max words, 100 samples - at ./dataset/gen-word-4010-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4055 max words - at ./dataset/shuffle-word-4055-count.jsonl\n", + "Generated JSONL file with - 3330 max words, 100 samples - at ./dataset/gen-word-3330-count.jsonl\n", + "Generated JSONL file with - 3370 max words, 100 samples - at ./dataset/gen-word-3370-count.jsonl\n", + "Generated JSONL file with - 4130 max words, 100 samples - at ./dataset/gen-word-4130-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4630 max words - at ./dataset/shuffle-word-4630-count.jsonl\n", "Generated JSONL file with - 3205 max words, 100 samples - at ./dataset/gen-word-3205-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", - "Generated JSONL file with - 1705 max words, 150 samples - at ./dataset/gen-word-1705-count.jsonl\n", - "Generated JSONL file with - 3435 max words, 100 samples - at ./dataset/gen-word-3435-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", - "Generated JSONL file with - 3510 max words, 100 samples - at ./dataset/gen-word-3510-count.jsonl\n", - "Generated JSONL file with - 3520 max words, 100 samples - at ./dataset/gen-word-3520-count.jsonl\n", - "Generated JSONL file with - 2950 max words, 125 samples - at ./dataset/gen-word-2950-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3435 max words - at ./dataset/shuffle-word-3435-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3430 max words - at ./dataset/shuffle-word-3430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3490 max words - at ./dataset/shuffle-word-3490-count.jsonl\n", - "Generated JSONL file with - 2485 max words, 125 samples - at ./dataset/gen-word-2485-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3415 max words - at ./dataset/shuffle-word-3415-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", - "Generated JSONL file with - 1825 max words, 150 samples - at ./dataset/gen-word-1825-count.jsonl\n", - "Generated JSONL file with - 2795 max words, 125 samples - at ./dataset/gen-word-2795-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", - "Generated JSONL file with - 1960 max words, 150 samples - at ./dataset/gen-word-1960-count.jsonl\n", - "Generated JSONL file with - 2875 max words, 125 samples - at ./dataset/gen-word-2875-count.jsonl\n", - "Generated JSONL file with - 3890 max words, 100 samples - at ./dataset/gen-word-3890-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2105 max words - at ./dataset/shuffle-word-2105-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2025 max words - at ./dataset/shuffle-word-2025-count.jsonl\n", - "Generated JSONL file with - 2915 max words, 125 samples - at ./dataset/gen-word-2915-count.jsonl\n", - "Generated JSONL file with - 2650 max words, 125 samples - at ./dataset/gen-word-2650-count.jsonl\n", - "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", - "Generated JSONL file with - 2820 max words, 125 samples - at ./dataset/gen-word-2820-count.jsonl\n", - "Generated JSONL file with - 3085 max words, 100 samples - at ./dataset/gen-word-3085-count.jsonl\n", - "Generated JSONL file with - 3505 max words, 100 samples - at ./dataset/gen-word-3505-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4815 max words - at ./dataset/shuffle-word-4815-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4645 max words - at ./dataset/shuffle-word-4645-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4255 max words - at ./dataset/shuffle-word-4255-count.jsonl\n", + "Generated JSONL file with - 3460 max words, 100 samples - at ./dataset/gen-word-3460-count.jsonl\n", + "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", + "Generated JSONL file with - 5745 max words, 100 samples - at ./dataset/gen-word-5745-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4690 max words - at ./dataset/shuffle-word-4690-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4925 max words - at ./dataset/shuffle-word-4925-count.jsonl\n", + "Generated JSONL file with - 3265 max words, 100 samples - at ./dataset/gen-word-3265-count.jsonl\n", "Generated JSONL file with - 3405 max words, 100 samples - at ./dataset/gen-word-3405-count.jsonl\n", - "Generated JSONL file with - 3545 max words, 100 samples - at ./dataset/gen-word-3545-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", - "Generated JSONL file with - 3390 max words, 100 samples - at ./dataset/gen-word-3390-count.jsonl\n", - "Generated JSONL file with - 1815 max words, 150 samples - at ./dataset/gen-word-1815-count.jsonl\n", - "Generated a single JSONL file with 143 samples (100 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", - "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", - "Generated JSONL file with - 1885 max words, 150 samples - at ./dataset/gen-word-1885-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3530 max words - at ./dataset/shuffle-word-3530-count.jsonl\n", - "Generated a single JSONL file with 147 samples (100 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", - "Generated JSONL file with - 2280 max words, 125 samples - at ./dataset/gen-word-2280-count.jsonl\n", - "Generated JSONL file with - 3940 max words, 100 samples - at ./dataset/gen-word-3940-count.jsonl\n", - "Generated JSONL file with - 3220 max words, 100 samples - at ./dataset/gen-word-3220-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3745 max words - at ./dataset/shuffle-word-3745-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3740 max words - at ./dataset/shuffle-word-3740-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated JSONL file with - 4770 max words, 100 samples - at ./dataset/gen-word-4770-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4070 max words - at ./dataset/shuffle-word-4070-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4880 max words - at ./dataset/shuffle-word-4880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", + "Generated JSONL file with - 5370 max words, 100 samples - at ./dataset/gen-word-5370-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5620 max words - at ./dataset/shuffle-word-5620-count.jsonl\n", "Generated JSONL file with - 3235 max words, 100 samples - at ./dataset/gen-word-3235-count.jsonl\n", - "Generated JSONL file with - 3195 max words, 100 samples - at ./dataset/gen-word-3195-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", - "Generated JSONL file with - 3255 max words, 100 samples - at ./dataset/gen-word-3255-count.jsonl\n", - "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", - "Generated JSONL file with - 3415 max words, 100 samples - at ./dataset/gen-word-3415-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", - "Generated JSONL file with - 2350 max words, 125 samples - at ./dataset/gen-word-2350-count.jsonl\n", - "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3295 max words - at ./dataset/shuffle-word-3295-count.jsonl\n", - "Generated JSONL file with - 3330 max words, 100 samples - at ./dataset/gen-word-3330-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3380 max words - at ./dataset/shuffle-word-3380-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3690 max words - at ./dataset/shuffle-word-3690-count.jsonl\n", - "Generated JSONL file with - 3530 max words, 100 samples - at ./dataset/gen-word-3530-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", - "Generated JSONL file with - 1850 max words, 150 samples - at ./dataset/gen-word-1850-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3805 max words - at ./dataset/shuffle-word-3805-count.jsonl\n", - "Generated a single JSONL file with 188 samples (100 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", - "Generated a single JSONL file with 187 samples (100 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2955 max words - at ./dataset/shuffle-word-2955-count.jsonl\n", - "Generated JSONL file with - 2190 max words, 125 samples - at ./dataset/gen-word-2190-count.jsonl\n", - "Generated JSONL file with - 2300 max words, 125 samples - at ./dataset/gen-word-2300-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3160 max words - at ./dataset/shuffle-word-3160-count.jsonl\n", - "Generated JSONL file with - 3930 max words, 100 samples - at ./dataset/gen-word-3930-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2715 max words - at ./dataset/shuffle-word-2715-count.jsonl\n", - "Generated JSONL file with - 2490 max words, 125 samples - at ./dataset/gen-word-2490-count.jsonl\n", - "Generated JSONL file with - 2340 max words, 125 samples - at ./dataset/gen-word-2340-count.jsonl\n", - "Generated a single JSONL file with 121 samples (100 token repeat) - 2645 max words - at ./dataset/shuffle-word-2645-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2785 max words - at ./dataset/shuffle-word-2785-count.jsonl\n", - "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3390 max words - at ./dataset/shuffle-word-3390-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3465 max words - at ./dataset/shuffle-word-3465-count.jsonl\n", - "Generated JSONL file with - 1880 max words, 150 samples - at ./dataset/gen-word-1880-count.jsonl\n", - "Generated JSONL file with - 3535 max words, 100 samples - at ./dataset/gen-word-3535-count.jsonl\n", - "Generated a single JSONL file with 197 samples (100 token repeat) - 2365 max words - at ./dataset/shuffle-word-2365-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", - "Generated JSONL file with - 2615 max words, 125 samples - at ./dataset/gen-word-2615-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", - "Generated JSONL file with - 3240 max words, 100 samples - at ./dataset/gen-word-3240-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", - "Generated a single JSONL file with 148 samples (100 token repeat) - 2545 max words - at ./dataset/shuffle-word-2545-count.jsonl\n", - "Generated a single JSONL file with 122 samples (100 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", - "Generated JSONL file with - 2500 max words, 125 samples - at ./dataset/gen-word-2500-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", - "Generated JSONL file with - 2640 max words, 125 samples - at ./dataset/gen-word-2640-count.jsonl\n", - "Generated JSONL file with - 1480 max words, 150 samples - at ./dataset/gen-word-1480-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3335 max words - at ./dataset/shuffle-word-3335-count.jsonl\n", - "Generated JSONL file with - 3030 max words, 100 samples - at ./dataset/gen-word-3030-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3510 max words - at ./dataset/shuffle-word-3510-count.jsonl\n", - "Generated JSONL file with - 2605 max words, 125 samples - at ./dataset/gen-word-2605-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", - "Generated JSONL file with - 2155 max words, 125 samples - at ./dataset/gen-word-2155-count.jsonl\n", - "Generated JSONL file with - 3470 max words, 100 samples - at ./dataset/gen-word-3470-count.jsonl\n", - "Generated JSONL file with - 3790 max words, 100 samples - at ./dataset/gen-word-3790-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3765 max words - at ./dataset/shuffle-word-3765-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", - "Generated JSONL file with - 2755 max words, 125 samples - at ./dataset/gen-word-2755-count.jsonl\n", - "Generated JSONL file with - 2895 max words, 125 samples - at ./dataset/gen-word-2895-count.jsonl\n", - "Generated JSONL file with - 2750 max words, 125 samples - at ./dataset/gen-word-2750-count.jsonl\n", - "Generated JSONL file with - 1585 max words, 150 samples - at ./dataset/gen-word-1585-count.jsonl\n", - "Generated JSONL file with - 1690 max words, 150 samples - at ./dataset/gen-word-1690-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated JSONL file with - 5360 max words, 100 samples - at ./dataset/gen-word-5360-count.jsonl\n", + "Generated JSONL file with - 5445 max words, 100 samples - at ./dataset/gen-word-5445-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5195 max words - at ./dataset/shuffle-word-5195-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4775 max words - at ./dataset/shuffle-word-4775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5455 max words - at ./dataset/shuffle-word-5455-count.jsonl\n", + "Generated JSONL file with - 4440 max words, 100 samples - at ./dataset/gen-word-4440-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4740 max words - at ./dataset/shuffle-word-4740-count.jsonl\n", + "Generated JSONL file with - 5165 max words, 100 samples - at ./dataset/gen-word-5165-count.jsonl\n", + "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", + "Generated JSONL file with - 5385 max words, 100 samples - at ./dataset/gen-word-5385-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5515 max words - at ./dataset/shuffle-word-5515-count.jsonl\n", + "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", + "Generated JSONL file with - 4145 max words, 100 samples - at ./dataset/gen-word-4145-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5485 max words - at ./dataset/shuffle-word-5485-count.jsonl\n", + "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", + "Generated JSONL file with - 3320 max words, 100 samples - at ./dataset/gen-word-3320-count.jsonl\n", "Generated JSONL file with - 3285 max words, 100 samples - at ./dataset/gen-word-3285-count.jsonl\n", - "Generated JSONL file with - 1945 max words, 150 samples - at ./dataset/gen-word-1945-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3070 max words - at ./dataset/shuffle-word-3070-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3720 max words - at ./dataset/shuffle-word-3720-count.jsonl\n", - "Generated a single JSONL file with 114 samples (100 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2845 max words - at ./dataset/shuffle-word-2845-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3865 max words - at ./dataset/shuffle-word-3865-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", - "Generated JSONL file with - 3580 max words, 100 samples - at ./dataset/gen-word-3580-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3110 max words - at ./dataset/shuffle-word-3110-count.jsonl\n", - "Generated JSONL file with - 3740 max words, 100 samples - at ./dataset/gen-word-3740-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3265 max words - at ./dataset/shuffle-word-3265-count.jsonl\n", - "Generated JSONL file with - 2210 max words, 125 samples - at ./dataset/gen-word-2210-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2875 max words - at ./dataset/shuffle-word-2875-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2185 max words - at ./dataset/shuffle-word-2185-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3915 max words - at ./dataset/shuffle-word-3915-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3015 max words - at ./dataset/shuffle-word-3015-count.jsonl\n", - "Generated JSONL file with - 3760 max words, 100 samples - at ./dataset/gen-word-3760-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3480 max words - at ./dataset/shuffle-word-3480-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1985 max words - at ./dataset/shuffle-word-1985-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3365 max words - at ./dataset/shuffle-word-3365-count.jsonl\n", - "Generated JSONL file with - 3365 max words, 100 samples - at ./dataset/gen-word-3365-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2925 max words - at ./dataset/shuffle-word-2925-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3010 max words - at ./dataset/shuffle-word-3010-count.jsonl\n", - "Generated JSONL file with - 3485 max words, 100 samples - at ./dataset/gen-word-3485-count.jsonl\n", - "Generated JSONL file with - 2555 max words, 125 samples - at ./dataset/gen-word-2555-count.jsonl\n", - "Generated JSONL file with - 2685 max words, 125 samples - at ./dataset/gen-word-2685-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3730 max words - at ./dataset/shuffle-word-3730-count.jsonl\n", - "Generated JSONL file with - 2345 max words, 125 samples - at ./dataset/gen-word-2345-count.jsonl\n", - "Generated JSONL file with - 2540 max words, 125 samples - at ./dataset/gen-word-2540-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3640 max words - at ./dataset/shuffle-word-3640-count.jsonl\n", - "Generated JSONL file with - 2905 max words, 125 samples - at ./dataset/gen-word-2905-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", - "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", - "Generated JSONL file with - 2480 max words, 125 samples - at ./dataset/gen-word-2480-count.jsonl\n", - "Generated a single JSONL file with 113 samples (100 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", - "Generated JSONL file with - 2975 max words, 125 samples - at ./dataset/gen-word-2975-count.jsonl\n", - "Generated JSONL file with - 2840 max words, 125 samples - at ./dataset/gen-word-2840-count.jsonl\n", - "Generated JSONL file with - 2470 max words, 125 samples - at ./dataset/gen-word-2470-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", - "Generated JSONL file with - 2035 max words, 125 samples - at ./dataset/gen-word-2035-count.jsonl\n", - "Generated JSONL file with - 3785 max words, 100 samples - at ./dataset/gen-word-3785-count.jsonl\n", - "Generated JSONL file with - 2855 max words, 125 samples - at ./dataset/gen-word-2855-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3545 max words - at ./dataset/shuffle-word-3545-count.jsonl\n", - "Generated JSONL file with - 3815 max words, 100 samples - at ./dataset/gen-word-3815-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", - "Generated JSONL file with - 1940 max words, 150 samples - at ./dataset/gen-word-1940-count.jsonl\n", - "Generated JSONL file with - 2775 max words, 125 samples - at ./dataset/gen-word-2775-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3660 max words - at ./dataset/shuffle-word-3660-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3635 max words - at ./dataset/shuffle-word-3635-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3315 max words - at ./dataset/shuffle-word-3315-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2375 max words - at ./dataset/shuffle-word-2375-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", - "Generated a single JSONL file with 103 samples (100 token repeat) - 2765 max words - at ./dataset/shuffle-word-2765-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3255 max words - at ./dataset/shuffle-word-3255-count.jsonl\n", - "Generated a single JSONL file with 187 samples (100 token repeat) - 2475 max words - at ./dataset/shuffle-word-2475-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3810 max words - at ./dataset/shuffle-word-3810-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", - "Generated JSONL file with - 3455 max words, 100 samples - at ./dataset/gen-word-3455-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3780 max words - at ./dataset/shuffle-word-3780-count.jsonl\n", - "Generated a single JSONL file with 159 samples (100 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", - "Generated JSONL file with - 3465 max words, 100 samples - at ./dataset/gen-word-3465-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3055 max words - at ./dataset/shuffle-word-3055-count.jsonl\n", - "Generated a single JSONL file with 104 samples (100 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", - "Generated JSONL file with - 2670 max words, 125 samples - at ./dataset/gen-word-2670-count.jsonl\n", - "Generated JSONL file with - 2900 max words, 125 samples - at ./dataset/gen-word-2900-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3895 max words - at ./dataset/shuffle-word-3895-count.jsonl\n", - "Generated JSONL file with - 3120 max words, 100 samples - at ./dataset/gen-word-3120-count.jsonl\n", - "Generated JSONL file with - 3570 max words, 100 samples - at ./dataset/gen-word-3570-count.jsonl\n", - "Generated JSONL file with - 3065 max words, 100 samples - at ./dataset/gen-word-3065-count.jsonl\n", - "Generated JSONL file with - 2360 max words, 125 samples - at ./dataset/gen-word-2360-count.jsonl\n", - "Generated JSONL file with - 2170 max words, 125 samples - at ./dataset/gen-word-2170-count.jsonl\n", - "Generated JSONL file with - 3280 max words, 100 samples - at ./dataset/gen-word-3280-count.jsonl\n", - "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", - "Generated JSONL file with - 3015 max words, 100 samples - at ./dataset/gen-word-3015-count.jsonl\n", - "Generated JSONL file with - 2940 max words, 125 samples - at ./dataset/gen-word-2940-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3020 max words - at ./dataset/shuffle-word-3020-count.jsonl\n", - "Generated JSONL file with - 2185 max words, 125 samples - at ./dataset/gen-word-2185-count.jsonl\n", - "Generated JSONL file with - 3865 max words, 100 samples - at ./dataset/gen-word-3865-count.jsonl\n", - "Generated JSONL file with - 3345 max words, 100 samples - at ./dataset/gen-word-3345-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3170 max words - at ./dataset/shuffle-word-3170-count.jsonl\n", - "Generated a single JSONL file with 144 samples (100 token repeat) - 2555 max words - at ./dataset/shuffle-word-2555-count.jsonl\n", - "Generated JSONL file with - 2515 max words, 125 samples - at ./dataset/gen-word-2515-count.jsonl\n", - "Generated JSONL file with - 2815 max words, 125 samples - at ./dataset/gen-word-2815-count.jsonl\n", - "Generated JSONL file with - 2930 max words, 125 samples - at ./dataset/gen-word-2930-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3555 max words - at ./dataset/shuffle-word-3555-count.jsonl\n", - "Generated JSONL file with - 3730 max words, 100 samples - at ./dataset/gen-word-3730-count.jsonl\n", - "Generated JSONL file with - 3440 max words, 100 samples - at ./dataset/gen-word-3440-count.jsonl\n", - "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3835 max words - at ./dataset/shuffle-word-3835-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3035 max words - at ./dataset/shuffle-word-3035-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2855 max words - at ./dataset/shuffle-word-2855-count.jsonl\n", - "Generated JSONL file with - 2960 max words, 125 samples - at ./dataset/gen-word-2960-count.jsonl\n", - "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", - "Generated JSONL file with - 2785 max words, 125 samples - at ./dataset/gen-word-2785-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3245 max words - at ./dataset/shuffle-word-3245-count.jsonl\n", - "Generated JSONL file with - 2600 max words, 125 samples - at ./dataset/gen-word-2600-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", - "Generated JSONL file with - 2010 max words, 125 samples - at ./dataset/gen-word-2010-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3580 max words - at ./dataset/shuffle-word-3580-count.jsonl\n", - "Generated a single JSONL file with 183 samples (100 token repeat) - 2405 max words - at ./dataset/shuffle-word-2405-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3820 max words - at ./dataset/shuffle-word-3820-count.jsonl\n", - "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", - "Generated a single JSONL file with 186 samples (100 token repeat) - 2485 max words - at ./dataset/shuffle-word-2485-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", - "Generated JSONL file with - 2880 max words, 125 samples - at ./dataset/gen-word-2880-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", - "Generated JSONL file with - 2715 max words, 125 samples - at ./dataset/gen-word-2715-count.jsonl\n", - "Generated JSONL file with - 3855 max words, 100 samples - at ./dataset/gen-word-3855-count.jsonl\n", - "Generated JSONL file with - 3660 max words, 100 samples - at ./dataset/gen-word-3660-count.jsonl\n", - "Generated JSONL file with - 2110 max words, 125 samples - at ./dataset/gen-word-2110-count.jsonl\n", - "Generated JSONL file with - 3035 max words, 100 samples - at ./dataset/gen-word-3035-count.jsonl\n", - "Generated JSONL file with - 2845 max words, 125 samples - at ./dataset/gen-word-2845-count.jsonl\n", - "Generated JSONL file with - 3395 max words, 100 samples - at ./dataset/gen-word-3395-count.jsonl\n", - "Generated JSONL file with - 3045 max words, 100 samples - at ./dataset/gen-word-3045-count.jsonl\n", - "Generated JSONL file with - 2545 max words, 125 samples - at ./dataset/gen-word-2545-count.jsonl\n", - "Generated JSONL file with - 2585 max words, 125 samples - at ./dataset/gen-word-2585-count.jsonl\n", - "Generated JSONL file with - 3145 max words, 100 samples - at ./dataset/gen-word-3145-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", - "Generated JSONL file with - 3355 max words, 100 samples - at ./dataset/gen-word-3355-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3830 max words - at ./dataset/shuffle-word-3830-count.jsonl\n", - "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated JSONL file with - 4855 max words, 100 samples - at ./dataset/gen-word-4855-count.jsonl\n", + "Generated JSONL file with - 3335 max words, 100 samples - at ./dataset/gen-word-3335-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4480 max words - at ./dataset/shuffle-word-4480-count.jsonl\n", + "Generated JSONL file with - 5415 max words, 100 samples - at ./dataset/gen-word-5415-count.jsonl\n", + "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", + "Generated JSONL file with - 5485 max words, 100 samples - at ./dataset/gen-word-5485-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3930 max words - at ./dataset/shuffle-word-3930-count.jsonl\n", - "Generated JSONL file with - 2980 max words, 125 samples - at ./dataset/gen-word-2980-count.jsonl\n", - "Generated JSONL file with - 2985 max words, 125 samples - at ./dataset/gen-word-2985-count.jsonl\n", - "Generated JSONL file with - 2890 max words, 125 samples - at ./dataset/gen-word-2890-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2945 max words - at ./dataset/shuffle-word-2945-count.jsonl\n", - "Generated JSONL file with - 3845 max words, 100 samples - at ./dataset/gen-word-3845-count.jsonl\n", - "Generated JSONL file with - 3830 max words, 100 samples - at ./dataset/gen-word-3830-count.jsonl\n", - "Generated JSONL file with - 3810 max words, 100 samples - at ./dataset/gen-word-3810-count.jsonl\n", - "Generated JSONL file with - 3795 max words, 100 samples - at ./dataset/gen-word-3795-count.jsonl\n", - "Generated JSONL file with - 3665 max words, 100 samples - at ./dataset/gen-word-3665-count.jsonl\n", - "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", - "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", - "Generated JSONL file with - 3770 max words, 100 samples - at ./dataset/gen-word-3770-count.jsonl\n", - "Generated JSONL file with - 3935 max words, 100 samples - at ./dataset/gen-word-3935-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4915 max words - at ./dataset/shuffle-word-4915-count.jsonl\n", + "Generated JSONL file with - 5560 max words, 100 samples - at ./dataset/gen-word-5560-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5815 max words - at ./dataset/shuffle-word-5815-count.jsonl\n", + "Generated JSONL file with - 3295 max words, 100 samples - at ./dataset/gen-word-3295-count.jsonl\n", + "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonlGenerated JSONL file with - 4090 max words, 100 samples - at ./dataset/gen-word-4090-count.jsonl\n", + "\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4890 max words - at ./dataset/shuffle-word-4890-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4535 max words - at ./dataset/shuffle-word-4535-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5420 max words - at ./dataset/shuffle-word-5420-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4145 max words - at ./dataset/shuffle-word-4145-count.jsonl\n", + "Generated JSONL file with - 3380 max words, 100 samples - at ./dataset/gen-word-3380-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4115 max words - at ./dataset/shuffle-word-4115-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4420 max words - at ./dataset/shuffle-word-4420-count.jsonl\n", + "Generated JSONL file with - 5455 max words, 100 samples - at ./dataset/gen-word-5455-count.jsonl\n", + "Generated JSONL file with - 4785 max words, 100 samples - at ./dataset/gen-word-4785-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5145 max words - at ./dataset/shuffle-word-5145-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5335 max words - at ./dataset/shuffle-word-5335-count.jsonl\n", + "Generated JSONL file with - 5430 max words, 100 samples - at ./dataset/gen-word-5430-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4685 max words - at ./dataset/shuffle-word-4685-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5460 max words - at ./dataset/shuffle-word-5460-count.jsonlGenerated JSONL file with - 4065 max words, 100 samples - at ./dataset/gen-word-4065-count.jsonl\n", + "Generated JSONL file with - 5280 max words, 100 samples - at ./dataset/gen-word-5280-count.jsonl\n", + "\n", + "Generated JSONL file with - 3340 max words, 100 samples - at ./dataset/gen-word-3340-count.jsonl\n", + "Generated JSONL file with - 4220 max words, 100 samples - at ./dataset/gen-word-4220-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5745 max words - at ./dataset/shuffle-word-5745-count.jsonl\n", + "Generated JSONL file with - 4820 max words, 100 samples - at ./dataset/gen-word-4820-count.jsonl\n", + "Generated JSONL file with - 4060 max words, 100 samples - at ./dataset/gen-word-4060-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4830 max words - at ./dataset/shuffle-word-4830-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4805 max words - at ./dataset/shuffle-word-4805-count.jsonl\n", + "Generated JSONL file with - 5695 max words, 100 samples - at ./dataset/gen-word-5695-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5630 max words - at ./dataset/shuffle-word-5630-count.jsonl\n", + "Generated JSONL file with - 4135 max words, 100 samples - at ./dataset/gen-word-4135-count.jsonl\n", + "Generated JSONL file with - 5730 max words, 100 samples - at ./dataset/gen-word-5730-count.jsonl\n", + "Generated JSONL file with - 4745 max words, 100 samples - at ./dataset/gen-word-4745-count.jsonl\n", + "Generated JSONL file with - 4915 max words, 100 samples - at ./dataset/gen-word-4915-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4760 max words - at ./dataset/shuffle-word-4760-count.jsonl\n", + "Generated JSONL file with - 5490 max words, 100 samples - at ./dataset/gen-word-5490-count.jsonl\n", + "Generated JSONL file with - 5585 max words, 100 samples - at ./dataset/gen-word-5585-count.jsonl\n", + "Generated JSONL file with - 4320 max words, 100 samples - at ./dataset/gen-word-4320-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4935 max words - at ./dataset/shuffle-word-4935-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5165 max words - at ./dataset/shuffle-word-5165-count.jsonl\n", + "Generated JSONL file with - 4055 max words, 100 samples - at ./dataset/gen-word-4055-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4980 max words - at ./dataset/shuffle-word-4980-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5220 max words - at ./dataset/shuffle-word-5220-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", + "\n", + "Generated JSONL file with - 4985 max words, 100 samples - at ./dataset/gen-word-4985-count.jsonl\n", + "Generated JSONL file with - 4740 max words, 100 samples - at ./dataset/gen-word-4740-count.jsonl\n", + "Generated JSONL file with - 4165 max words, 100 samples - at ./dataset/gen-word-4165-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5295 max words - at ./dataset/shuffle-word-5295-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5135 max words - at ./dataset/shuffle-word-5135-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5510 max words - at ./dataset/shuffle-word-5510-count.jsonl\n", + "Generated JSONL file with - 5635 max words, 100 samples - at ./dataset/gen-word-5635-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5635 max words - at ./dataset/shuffle-word-5635-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5855 max words - at ./dataset/shuffle-word-5855-count.jsonl\n", + "Generated JSONL file with - 3895 max words, 100 samples - at ./dataset/gen-word-3895-count.jsonl\n", + "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", + "Generated JSONL file with - 4180 max words, 100 samples - at ./dataset/gen-word-4180-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5190 max words - at ./dataset/shuffle-word-5190-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5715 max words - at ./dataset/shuffle-word-5715-count.jsonl\n", + "Generated JSONL file with - 3910 max words, 100 samples - at ./dataset/gen-word-3910-count.jsonl\n", + "Generated JSONL file with - 5805 max words, 100 samples - at ./dataset/gen-word-5805-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5680 max words - at ./dataset/shuffle-word-5680-count.jsonl\n", + "Generated JSONL file with - 4185 max words, 100 samples - at ./dataset/gen-word-4185-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5170 max words - at ./dataset/shuffle-word-5170-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4960 max words - at ./dataset/shuffle-word-4960-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5415 max words - at ./dataset/shuffle-word-5415-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5545 max words - at ./dataset/shuffle-word-5545-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4365 max words - at ./dataset/shuffle-word-4365-count.jsonl\n", + "Generated JSONL file with - 4115 max words, 100 samples - at ./dataset/gen-word-4115-count.jsonl\n", + "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", + "Generated JSONL file with - 3980 max words, 100 samples - at ./dataset/gen-word-3980-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5645 max words - at ./dataset/shuffle-word-5645-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4315 max words - at ./dataset/shuffle-word-4315-count.jsonl\n", + "Generated JSONL file with - 4340 max words, 100 samples - at ./dataset/gen-word-4340-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5080 max words - at ./dataset/shuffle-word-5080-count.jsonl\n", + "Generated JSONL file with - 4345 max words, 100 samples - at ./dataset/gen-word-4345-count.jsonl\n", + "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", + "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4955 max words - at ./dataset/shuffle-word-4955-count.jsonl\n", + "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4985 max words - at ./dataset/shuffle-word-4985-count.jsonl\n", + "Generated JSONL file with - 5145 max words, 100 samples - at ./dataset/gen-word-5145-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4845 max words - at ./dataset/shuffle-word-4845-count.jsonl\n", + "Generated JSONL file with - 5760 max words, 100 samples - at ./dataset/gen-word-5760-count.jsonl\n", + "Generated JSONL file with - 4290 max words, 100 samples - at ./dataset/gen-word-4290-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated JSONL file with - 5375 max words, 100 samples - at ./dataset/gen-word-5375-count.jsonl\n", + "Generated JSONL file with - 4965 max words, 100 samples - at ./dataset/gen-word-4965-count.jsonl\n", + "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5320 max words - at ./dataset/shuffle-word-5320-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5370 max words - at ./dataset/shuffle-word-5370-count.jsonl\n", + "Generated JSONL file with - 4620 max words, 100 samples - at ./dataset/gen-word-4620-count.jsonl\n", + "Generated JSONL file with - 5530 max words, 100 samples - at ./dataset/gen-word-5530-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5530 max words - at ./dataset/shuffle-word-5530-count.jsonl\n", + "Generated JSONL file with - 5810 max words, 100 samples - at ./dataset/gen-word-5810-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5960 max words - at ./dataset/shuffle-word-5960-count.jsonl\n", + "Generated JSONL file with - 5670 max words, 100 samples - at ./dataset/gen-word-5670-count.jsonl\n", + "Generated JSONL file with - 4380 max words, 100 samples - at ./dataset/gen-word-4380-count.jsonl\n", + "Generated JSONL file with - 5535 max words, 100 samples - at ./dataset/gen-word-5535-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5990 max words - at ./dataset/shuffle-word-5990-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4335 max words - at ./dataset/shuffle-word-4335-count.jsonl\n", + "Generated JSONL file with - 5755 max words, 100 samples - at ./dataset/gen-word-5755-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4495 max words - at ./dataset/shuffle-word-4495-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5830 max words - at ./dataset/shuffle-word-5830-count.jsonl\n", + "Generated JSONL file with - 4305 max words, 100 samples - at ./dataset/gen-word-4305-count.jsonl\n", + "Generated JSONL file with - 4160 max words, 100 samples - at ./dataset/gen-word-4160-count.jsonl\n", + "Generated JSONL file with - 5515 max words, 100 samples - at ./dataset/gen-word-5515-count.jsonl\n", + "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", + "Generated JSONL file with - 3930 max words, 100 samples - at ./dataset/gen-word-3930-count.jsonl\n", + "Generated JSONL file with - 4310 max words, 100 samples - at ./dataset/gen-word-4310-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5730 max words - at ./dataset/shuffle-word-5730-count.jsonl\n", + "Generated a single JSONL file with 24028 samples (500 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5440 max words - at ./dataset/shuffle-word-5440-count.jsonl\n", + "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4265 max words - at ./dataset/shuffle-word-4265-count.jsonl\n", + "Generated JSONL file with - 4170 max words, 100 samples - at ./dataset/gen-word-4170-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5810 max words - at ./dataset/shuffle-word-5810-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4280 max words - at ./dataset/shuffle-word-4280-count.jsonl\n", + "Generated JSONL file with - 4890 max words, 100 samples - at ./dataset/gen-word-4890-count.jsonl\n", + "Generated JSONL file with - 4980 max words, 100 samples - at ./dataset/gen-word-4980-count.jsonl\n", + "Generated JSONL file with - 4705 max words, 100 samples - at ./dataset/gen-word-4705-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5270 max words - at ./dataset/shuffle-word-5270-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5280 max words - at ./dataset/shuffle-word-5280-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5875 max words - at ./dataset/shuffle-word-5875-count.jsonl\n", + "Generated JSONL file with - 5290 max words, 100 samples - at ./dataset/gen-word-5290-count.jsonl\n", + "Generated JSONL file with - 4760 max words, 100 samples - at ./dataset/gen-word-4760-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4340 max words - at ./dataset/shuffle-word-4340-count.jsonl\n", + "Generated JSONL file with - 5420 max words, 100 samples - at ./dataset/gen-word-5420-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4755 max words - at ./dataset/shuffle-word-4755-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4405 max words - at ./dataset/shuffle-word-4405-count.jsonl\n", + "Generated JSONL file with - 5465 max words, 100 samples - at ./dataset/gen-word-5465-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4220 max words - at ./dataset/shuffle-word-4220-count.jsonl\n", + "Generated JSONL file with - 5920 max words, 100 samples - at ./dataset/gen-word-5920-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", + "Generated JSONL file with - 4895 max words, 100 samples - at ./dataset/gen-word-4895-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5590 max words - at ./dataset/shuffle-word-5590-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5850 max words - at ./dataset/shuffle-word-5850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4515 max words - at ./dataset/shuffle-word-4515-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4530 max words - at ./dataset/shuffle-word-4530-count.jsonl\n", + "Generated JSONL file with - 5205 max words, 100 samples - at ./dataset/gen-word-5205-count.jsonl\n", + "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", + "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", + "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5070 max words - at ./dataset/shuffle-word-5070-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4205 max words - at ./dataset/shuffle-word-4205-count.jsonl\n", + "Generated JSONL file with - 5000 max words, 100 samples - at ./dataset/gen-word-5000-count.jsonl\n", + "Generated JSONL file with - 4790 max words, 100 samples - at ./dataset/gen-word-4790-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5865 max words - at ./dataset/shuffle-word-5865-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4270 max words - at ./dataset/shuffle-word-4270-count.jsonl\n", + "Generated JSONL file with - 5790 max words, 100 samples - at ./dataset/gen-word-5790-count.jsonl\n", + "Generated JSONL file with - 5665 max words, 100 samples - at ./dataset/gen-word-5665-count.jsonl\n", + "Generated JSONL file with - 4555 max words, 100 samples - at ./dataset/gen-word-4555-count.jsonl\n", + "Generated JSONL file with - 5840 max words, 100 samples - at ./dataset/gen-word-5840-count.jsonl\n", + "Generated JSONL file with - 4680 max words, 100 samples - at ./dataset/gen-word-4680-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4260 max words - at ./dataset/shuffle-word-4260-count.jsonl\n", + "Generated JSONL file with - 5390 max words, 100 samples - at ./dataset/gen-word-5390-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5720 max words - at ./dataset/shuffle-word-5720-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5940 max words - at ./dataset/shuffle-word-5940-count.jsonl\n", + "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", + "Generated JSONL file with - 5645 max words, 100 samples - at ./dataset/gen-word-5645-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4990 max words - at ./dataset/shuffle-word-4990-count.jsonl\n", + "Generated JSONL file with - 4265 max words, 100 samples - at ./dataset/gen-word-4265-count.jsonl\n", + "Generated JSONL file with - 5215 max words, 100 samples - at ./dataset/gen-word-5215-count.jsonl\n", + "Generated JSONL file with - 5095 max words, 100 samples - at ./dataset/gen-word-5095-count.jsonl\n", + "Generated JSONL file with - 5930 max words, 100 samples - at ./dataset/gen-word-5930-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", + "Generated JSONL file with - 5765 max words, 100 samples - at ./dataset/gen-word-5765-count.jsonl\n", + "Generated JSONL file with - 4815 max words, 100 samples - at ./dataset/gen-word-4815-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4320 max words - at ./dataset/shuffle-word-4320-count.jsonl\n", + "Generated JSONL file with - 5715 max words, 100 samples - at ./dataset/gen-word-5715-count.jsonl\n", + "Generated JSONL file with - 5355 max words, 100 samples - at ./dataset/gen-word-5355-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4215 max words - at ./dataset/shuffle-word-4215-count.jsonl\n", + "Generated JSONL file with - 5700 max words, 100 samples - at ./dataset/gen-word-5700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5595 max words - at ./dataset/shuffle-word-5595-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5435 max words - at ./dataset/shuffle-word-5435-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4970 max words - at ./dataset/shuffle-word-4970-count.jsonl\n", + "Generated JSONL file with - 5835 max words, 100 samples - at ./dataset/gen-word-5835-count.jsonl\n", + "Generated JSONL file with - 4690 max words, 100 samples - at ./dataset/gen-word-4690-count.jsonl\n", + "Generated JSONL file with - 4635 max words, 100 samples - at ./dataset/gen-word-4635-count.jsonl\n", + "Generated JSONL file with - 4655 max words, 100 samples - at ./dataset/gen-word-4655-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4505 max words - at ./dataset/shuffle-word-4505-count.jsonl\n", + "Generated JSONL file with - 5495 max words, 100 samples - at ./dataset/gen-word-5495-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4640 max words - at ./dataset/shuffle-word-4640-count.jsonl\n", + "Generated JSONL file with - 5070 max words, 100 samples - at ./dataset/gen-word-5070-count.jsonl\n", + "Generated JSONL file with - 5610 max words, 100 samples - at ./dataset/gen-word-5610-count.jsonl\n", + "Generated JSONL file with - 5210 max words, 100 samples - at ./dataset/gen-word-5210-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4940 max words - at ./dataset/shuffle-word-4940-count.jsonl\n", + "Generated JSONL file with - 4465 max words, 100 samples - at ./dataset/gen-word-4465-count.jsonl\n", + "Generated JSONL file with - 5160 max words, 100 samples - at ./dataset/gen-word-5160-count.jsonl\n", + "Generated JSONL file with - 5170 max words, 100 samples - at ./dataset/gen-word-5170-count.jsonl\n", + "Generated JSONL file with - 5820 max words, 100 samples - at ./dataset/gen-word-5820-count.jsonl\n", + "Generated JSONL file with - 4865 max words, 100 samples - at ./dataset/gen-word-4865-count.jsonl\n", + "Generated JSONL file with - 5875 max words, 100 samples - at ./dataset/gen-word-5875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4620 max words - at ./dataset/shuffle-word-4620-count.jsonl\n", + "Generated JSONL file with - 5245 max words, 100 samples - at ./dataset/gen-word-5245-count.jsonl\n", + "Generated JSONL file with - 5310 max words, 100 samples - at ./dataset/gen-word-5310-count.jsonl\n", + "Generated JSONL file with - 5865 max words, 100 samples - at ./dataset/gen-word-5865-count.jsonl\n", + "Generated JSONL file with - 5510 max words, 100 samples - at ./dataset/gen-word-5510-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", + "Generated JSONL file with - 5990 max words, 100 samples - at ./dataset/gen-word-5990-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", + "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", + "Generated JSONL file with - 5795 max words, 100 samples - at ./dataset/gen-word-5795-count.jsonl\n", + "Generated JSONL file with - 5905 max words, 100 samples - at ./dataset/gen-word-5905-count.jsonl\n", + "Generated JSONL file with - 5720 max words, 100 samples - at ./dataset/gen-word-5720-count.jsonl\n", + "Generated JSONL file with - 4255 max words, 100 samples - at ./dataset/gen-word-4255-count.jsonl\n", + "Generated JSONL file with - 5620 max words, 100 samples - at ./dataset/gen-word-5620-count.jsonl\n", + "Generated JSONL file with - 5995 max words, 100 samples - at ./dataset/gen-word-5995-count.jsonl\n", + "Generated JSONL file with - 4830 max words, 100 samples - at ./dataset/gen-word-4830-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", + "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", + "Generated JSONL file with - 4730 max words, 100 samples - at ./dataset/gen-word-4730-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5655 max words - at ./dataset/shuffle-word-5655-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5615 max words - at ./dataset/shuffle-word-5615-count.jsonl\n", + "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", + "Generated JSONL file with - 5460 max words, 100 samples - at ./dataset/gen-word-5460-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4995 max words - at ./dataset/shuffle-word-4995-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4555 max words - at ./dataset/shuffle-word-4555-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4905 max words - at ./dataset/shuffle-word-4905-count.jsonlGenerated JSONL file with - 5605 max words, 100 samples - at ./dataset/gen-word-5605-count.jsonl\n", + "\n", + "Generated JSONL file with - 5440 max words, 100 samples - at ./dataset/gen-word-5440-count.jsonl\n", + "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5580 max words - at ./dataset/shuffle-word-5580-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5640 max words - at ./dataset/shuffle-word-5640-count.jsonl\n", + "Generated JSONL file with - 5295 max words, 100 samples - at ./dataset/gen-word-5295-count.jsonl\n", + "Generated JSONL file with - 4845 max words, 100 samples - at ./dataset/gen-word-4845-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5995 max words - at ./dataset/shuffle-word-5995-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5885 max words - at ./dataset/shuffle-word-5885-count.jsonl\n", + "Generated JSONL file with - 5735 max words, 100 samples - at ./dataset/gen-word-5735-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5915 max words - at ./dataset/shuffle-word-5915-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5740 max words - at ./dataset/shuffle-word-5740-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4895 max words - at ./dataset/shuffle-word-4895-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4660 max words - at ./dataset/shuffle-word-4660-count.jsonl\n", + "Generated JSONL file with - 5405 max words, 100 samples - at ./dataset/gen-word-5405-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", + "Generated JSONL file with - 5045 max words, 100 samples - at ./dataset/gen-word-5045-count.jsonl\n", + "Generated JSONL file with - 5520 max words, 100 samples - at ./dataset/gen-word-5520-count.jsonl\n", + "Generated JSONL file with - 4885 max words, 100 samples - at ./dataset/gen-word-4885-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5955 max words - at ./dataset/shuffle-word-5955-count.jsonl\n", + "Generated JSONL file with - 5365 max words, 100 samples - at ./dataset/gen-word-5365-count.jsonl\n", + "Generated JSONL file with - 4955 max words, 100 samples - at ./dataset/gen-word-4955-count.jsonl\n", + "Generated JSONL file with - 5065 max words, 100 samples - at ./dataset/gen-word-5065-count.jsonl\n", + "Generated JSONL file with - 5910 max words, 100 samples - at ./dataset/gen-word-5910-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5755 max words - at ./dataset/shuffle-word-5755-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5820 max words - at ./dataset/shuffle-word-5820-count.jsonl\n", + "Generated JSONL file with - 5340 max words, 100 samples - at ./dataset/gen-word-5340-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5585 max words - at ./dataset/shuffle-word-5585-count.jsonl\n", + "Generated JSONL file with - 6000 max words, 100 samples - at ./dataset/gen-word-6000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5685 max words - at ./dataset/shuffle-word-5685-count.jsonl\n", + "Generated JSONL file with - 4735 max words, 100 samples - at ./dataset/gen-word-4735-count.jsonl\n", + "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", + "Generated JSONL file with - 4480 max words, 100 samples - at ./dataset/gen-word-4480-count.jsonl\n", + "Generated JSONL file with - 4495 max words, 100 samples - at ./dataset/gen-word-4495-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", + "Generated JSONL file with - 4970 max words, 100 samples - at ./dataset/gen-word-4970-count.jsonl\n", + "Generated JSONL file with - 5220 max words, 100 samples - at ./dataset/gen-word-5220-count.jsonl\n", + "Generated JSONL file with - 4595 max words, 100 samples - at ./dataset/gen-word-4595-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5840 max words - at ./dataset/shuffle-word-5840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5920 max words - at ./dataset/shuffle-word-5920-count.jsonl\n", + "Generated JSONL file with - 5265 max words, 100 samples - at ./dataset/gen-word-5265-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5260 max words - at ./dataset/shuffle-word-5260-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", + "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", + "Generated JSONL file with - 5915 max words, 100 samples - at ./dataset/gen-word-5915-count.jsonl\n", + "Generated JSONL file with - 5030 max words, 100 samples - at ./dataset/gen-word-5030-count.jsonl\n", + "Generated JSONL file with - 4530 max words, 100 samples - at ./dataset/gen-word-4530-count.jsonl\n", + "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", + "Generated JSONL file with - 5830 max words, 100 samples - at ./dataset/gen-word-5830-count.jsonl\n", + "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5780 max words - at ./dataset/shuffle-word-5780-count.jsonl\n", + "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", + "Generated JSONL file with - 5945 max words, 100 samples - at ./dataset/gen-word-5945-count.jsonl\n", + "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", + "Generated JSONL file with - 5595 max words, 100 samples - at ./dataset/gen-word-5595-count.jsonl\n", + "Generated JSONL file with - 5630 max words, 100 samples - at ./dataset/gen-word-5630-count.jsonl\n", + "Generated JSONL file with - 4430 max words, 100 samples - at ./dataset/gen-word-4430-count.jsonl\n", + "Generated JSONL file with - 5710 max words, 100 samples - at ./dataset/gen-word-5710-count.jsonl\n", + "Generated JSONL file with - 5135 max words, 100 samples - at ./dataset/gen-word-5135-count.jsonl\n", + "Generated JSONL file with - 5235 max words, 100 samples - at ./dataset/gen-word-5235-count.jsonl\n", + "Generated JSONL file with - 4460 max words, 100 samples - at ./dataset/gen-word-4460-count.jsonl\n", + "Generated JSONL file with - 4805 max words, 100 samples - at ./dataset/gen-word-4805-count.jsonl\n", + "Generated JSONL file with - 4870 max words, 100 samples - at ./dataset/gen-word-4870-count.jsonl\n", + "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", + "Generated JSONL file with - 4795 max words, 100 samples - at ./dataset/gen-word-4795-count.jsonl\n", + "Generated JSONL file with - 4470 max words, 100 samples - at ./dataset/gen-word-4470-count.jsonl\n", + "Generated JSONL file with - 4360 max words, 100 samples - at ./dataset/gen-word-4360-count.jsonl\n", + "Generated JSONL file with - 5785 max words, 100 samples - at ./dataset/gen-word-5785-count.jsonl\n", + "Generated JSONL file with - 5410 max words, 100 samples - at ./dataset/gen-word-5410-count.jsonl\n", + "Generated JSONL file with - 4435 max words, 100 samples - at ./dataset/gen-word-4435-count.jsonl\n", + "Generated JSONL file with - 5660 max words, 100 samples - at ./dataset/gen-word-5660-count.jsonl\n", + "Generated JSONL file with - 5655 max words, 100 samples - at ./dataset/gen-word-5655-count.jsonl\n", + "Generated JSONL file with - 4720 max words, 100 samples - at ./dataset/gen-word-4720-count.jsonl\n", + "Generated JSONL file with - 4540 max words, 100 samples - at ./dataset/gen-word-4540-count.jsonl\n", + "Generated JSONL file with - 5740 max words, 100 samples - at ./dataset/gen-word-5740-count.jsonl\n", + "Generated JSONL file with - 4410 max words, 100 samples - at ./dataset/gen-word-4410-count.jsonl\n", + "Generated JSONL file with - 4925 max words, 100 samples - at ./dataset/gen-word-4925-count.jsonl\n", + "Generated JSONL file with - 4810 max words, 100 samples - at ./dataset/gen-word-4810-count.jsonl\n", + "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", + "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", + "Generated JSONL file with - 5890 max words, 100 samples - at ./dataset/gen-word-5890-count.jsonl\n", + "Generated JSONL file with - 4520 max words, 100 samples - at ./dataset/gen-word-4520-count.jsonl\n", + "Generated JSONL file with - 5505 max words, 100 samples - at ./dataset/gen-word-5505-count.jsonl\n", + "Generated JSONL file with - 5685 max words, 100 samples - at ./dataset/gen-word-5685-count.jsonl\n", + "Generated JSONL file with - 5935 max words, 100 samples - at ./dataset/gen-word-5935-count.jsonl\n", + "Generated JSONL file with - 4880 max words, 100 samples - at ./dataset/gen-word-4880-count.jsonl\n", + "Generated JSONL file with - 5105 max words, 100 samples - at ./dataset/gen-word-5105-count.jsonl\n", + "Generated JSONL file with - 5480 max words, 100 samples - at ./dataset/gen-word-5480-count.jsonl\n", + "Generated JSONL file with - 5985 max words, 100 samples - at ./dataset/gen-word-5985-count.jsonl\n", + "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", + "Generated JSONL file with - 5545 max words, 100 samples - at ./dataset/gen-word-5545-count.jsonl\n", + "Generated JSONL file with - 5870 max words, 100 samples - at ./dataset/gen-word-5870-count.jsonl\n", + "Generated a single JSONL file with 279154 samples (500 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", "## Done ##\n", - "total 3.8G\n", - "drwxrwxr-x 2 recursal recursal 84K Jan 22 08:53 .\n", - "drwxrwxr-x 5 recursal recursal 4.0K Jan 22 08:52 ..\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 163K Jan 22 08:53 gen-word-100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-1040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 167K Jan 22 08:53 gen-word-105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 30K Jan 22 08:53 gen-word-10-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 08:53 gen-word-1100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 175K Jan 22 08:53 gen-word-110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 185K Jan 22 08:53 gen-word-115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 08:53 gen-word-1180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 188K Jan 22 08:53 gen-word-120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 08:53 gen-word-1255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 199K Jan 22 08:53 gen-word-125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 200K Jan 22 08:53 gen-word-130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 08:53 gen-word-1320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 211K Jan 22 08:53 gen-word-135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 08:53 gen-word-1405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 219K Jan 22 08:53 gen-word-140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 222K Jan 22 08:53 gen-word-145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 08:53 gen-word-1465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 231K Jan 22 08:53 gen-word-150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 08:53 gen-word-1530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 240K Jan 22 08:53 gen-word-155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 37K Jan 22 08:53 gen-word-15-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 250K Jan 22 08:53 gen-word-160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 08:53 gen-word-1610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 254K Jan 22 08:53 gen-word-165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-1675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 262K Jan 22 08:53 gen-word-170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-1740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 266K Jan 22 08:53 gen-word-175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 279K Jan 22 08:53 gen-word-180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-1810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 284K Jan 22 08:53 gen-word-185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-1885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 290K Jan 22 08:53 gen-word-190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-1950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 300K Jan 22 08:53 gen-word-195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-1995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-2005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 308K Jan 22 08:53 gen-word-200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 08:53 gen-word-2010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 311K Jan 22 08:53 gen-word-205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 08:53 gen-word-2095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 44K Jan 22 08:53 gen-word-20-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 323K Jan 22 08:53 gen-word-210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 330K Jan 22 08:53 gen-word-215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 gen-word-2180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 337K Jan 22 08:53 gen-word-220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 340K Jan 22 08:53 gen-word-225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 gen-word-2260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 353K Jan 22 08:53 gen-word-230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 gen-word-2355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 356K Jan 22 08:53 gen-word-235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 366K Jan 22 08:53 gen-word-240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-2430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 370K Jan 22 08:53 gen-word-245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 379K Jan 22 08:53 gen-word-250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-2520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 386K Jan 22 08:53 gen-word-255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-2595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 51K Jan 22 08:53 gen-word-25-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 392K Jan 22 08:53 gen-word-260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 404K Jan 22 08:53 gen-word-265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-2690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 406K Jan 22 08:53 gen-word-270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 418K Jan 22 08:53 gen-word-275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-2770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 422K Jan 22 08:53 gen-word-280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-2855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 426K Jan 22 08:53 gen-word-285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 443K Jan 22 08:53 gen-word-290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-2945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 438K Jan 22 08:53 gen-word-295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-2995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 452K Jan 22 08:53 gen-word-300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 08:53 gen-word-3055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 457K Jan 22 08:53 gen-word-305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 57K Jan 22 08:53 gen-word-30-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 470K Jan 22 08:53 gen-word-310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 476K Jan 22 08:53 gen-word-315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 08:53 gen-word-3160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 478K Jan 22 08:53 gen-word-320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 08:53 gen-word-3250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 490K Jan 22 08:53 gen-word-325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 499K Jan 22 08:53 gen-word-330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 08:53 gen-word-3355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 501K Jan 22 08:53 gen-word-335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 507K Jan 22 08:53 gen-word-340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 08:53 gen-word-3455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 517K Jan 22 08:53 gen-word-345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 521K Jan 22 08:53 gen-word-350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 533K Jan 22 08:53 gen-word-355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 08:53 gen-word-3565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 68K Jan 22 08:53 gen-word-35-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 544K Jan 22 08:53 gen-word-360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 543K Jan 22 08:53 gen-word-365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 08:53 gen-word-3690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 547K Jan 22 08:53 gen-word-370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 563K Jan 22 08:53 gen-word-375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 565K Jan 22 08:53 gen-word-380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 08:53 gen-word-3810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 572K Jan 22 08:53 gen-word-385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 08:53 gen-word-3880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 584K Jan 22 08:53 gen-word-390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 587K Jan 22 08:53 gen-word-395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-3990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 08:53 gen-word-3995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 08:53 gen-word-4000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 605K Jan 22 08:53 gen-word-400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 610K Jan 22 08:53 gen-word-405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 74K Jan 22 08:53 gen-word-40-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 612K Jan 22 08:53 gen-word-410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 626K Jan 22 08:53 gen-word-415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 632K Jan 22 08:53 gen-word-420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 636K Jan 22 08:53 gen-word-425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 637K Jan 22 08:53 gen-word-430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 649K Jan 22 08:53 gen-word-435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 656K Jan 22 08:53 gen-word-440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 652K Jan 22 08:53 gen-word-445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 670K Jan 22 08:53 gen-word-450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 672K Jan 22 08:53 gen-word-455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 80K Jan 22 08:53 gen-word-45-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 694K Jan 22 08:53 gen-word-460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 698K Jan 22 08:53 gen-word-465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 697K Jan 22 08:53 gen-word-470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 713K Jan 22 08:53 gen-word-475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 710K Jan 22 08:53 gen-word-480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 729K Jan 22 08:53 gen-word-485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 734K Jan 22 08:53 gen-word-490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 733K Jan 22 08:53 gen-word-495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 739K Jan 22 08:53 gen-word-500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 752K Jan 22 08:53 gen-word-505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 89K Jan 22 08:53 gen-word-50-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 767K Jan 22 08:53 gen-word-510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 770K Jan 22 08:53 gen-word-515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 767K Jan 22 08:53 gen-word-520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 789K Jan 22 08:53 gen-word-525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 791K Jan 22 08:53 gen-word-530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 798K Jan 22 08:53 gen-word-535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 803K Jan 22 08:53 gen-word-540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 811K Jan 22 08:53 gen-word-545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 817K Jan 22 08:53 gen-word-550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 822K Jan 22 08:53 gen-word-555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 97K Jan 22 08:53 gen-word-55-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 839K Jan 22 08:53 gen-word-560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 840K Jan 22 08:53 gen-word-565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 854K Jan 22 08:53 gen-word-570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 856K Jan 22 08:53 gen-word-575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 858K Jan 22 08:53 gen-word-580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 868K Jan 22 08:53 gen-word-585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 884K Jan 22 08:53 gen-word-590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 881K Jan 22 08:53 gen-word-595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 23K Jan 22 08:53 gen-word-5-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 885K Jan 22 08:53 gen-word-600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 891K Jan 22 08:53 gen-word-605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 103K Jan 22 08:53 gen-word-60-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 908K Jan 22 08:53 gen-word-610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 908K Jan 22 08:53 gen-word-615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 926K Jan 22 08:53 gen-word-620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 927K Jan 22 08:53 gen-word-625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 941K Jan 22 08:53 gen-word-630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 950K Jan 22 08:53 gen-word-635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 955K Jan 22 08:53 gen-word-640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 954K Jan 22 08:53 gen-word-645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 952K Jan 22 08:53 gen-word-650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 966K Jan 22 08:53 gen-word-655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 111K Jan 22 08:53 gen-word-65-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 972K Jan 22 08:53 gen-word-660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1002K Jan 22 08:53 gen-word-665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1000K Jan 22 08:53 gen-word-670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1004K Jan 22 08:53 gen-word-675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1014K Jan 22 08:53 gen-word-680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1019K Jan 22 08:53 gen-word-685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 116K Jan 22 08:53 gen-word-70-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 123K Jan 22 08:53 gen-word-75-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 08:53 gen-word-760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 136K Jan 22 08:53 gen-word-80-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 08:53 gen-word-835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 136K Jan 22 08:53 gen-word-85-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 08:53 gen-word-900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 147K Jan 22 08:53 gen-word-90-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 151K Jan 22 08:53 gen-word-95-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 08:53 gen-word-965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 08:53 gen-word-995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 shuffle-word-100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 shuffle-word-105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 08:53 shuffle-word-10-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 08:53 shuffle-word-110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 08:53 shuffle-word-155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 08:53 shuffle-word-1565-count.jsonl\n" + "total 6.2G\n", + "drwxrwxr-x 2 recursal recursal 132K Jan 22 23:49 .\n", + "drwxrwxr-x 5 recursal recursal 4.0K Jan 22 23:09 ..\n", + "-rw-rw-r-- 1 recursal recursal 495K Jan 22 23:49 gen-word-1000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 496K Jan 22 23:49 gen-word-1005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 530K Jan 22 23:49 gen-word-100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 500K Jan 22 23:49 gen-word-1010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 503K Jan 22 23:49 gen-word-1015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 497K Jan 22 23:49 gen-word-1020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 504K Jan 22 23:49 gen-word-1025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 508K Jan 22 23:49 gen-word-1030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 513K Jan 22 23:49 gen-word-1035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 510K Jan 22 23:49 gen-word-1040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 510K Jan 22 23:49 gen-word-1045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 516K Jan 22 23:49 gen-word-1050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 516K Jan 22 23:49 gen-word-1055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 55K Jan 22 23:49 gen-word-105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 522K Jan 22 23:49 gen-word-1060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 516K Jan 22 23:49 gen-word-1065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 524K Jan 22 23:49 gen-word-1070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 533K Jan 22 23:49 gen-word-1075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 538K Jan 22 23:49 gen-word-1080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 533K Jan 22 23:49 gen-word-1085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 532K Jan 22 23:49 gen-word-1090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 536K Jan 22 23:49 gen-word-1095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 99K Jan 22 23:49 gen-word-10-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 546K Jan 22 23:49 gen-word-1100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 544K Jan 22 23:49 gen-word-1105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 58K Jan 22 23:49 gen-word-110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 536K Jan 22 23:49 gen-word-1110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 544K Jan 22 23:49 gen-word-1115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 554K Jan 22 23:49 gen-word-1120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 551K Jan 22 23:49 gen-word-1125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 553K Jan 22 23:49 gen-word-1130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 555K Jan 22 23:49 gen-word-1135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 554K Jan 22 23:49 gen-word-1140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 563K Jan 22 23:49 gen-word-1145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 562K Jan 22 23:49 gen-word-1150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 561K Jan 22 23:49 gen-word-1155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 61K Jan 22 23:49 gen-word-115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 577K Jan 22 23:49 gen-word-1160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 576K Jan 22 23:49 gen-word-1165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 571K Jan 22 23:49 gen-word-1170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 573K Jan 22 23:49 gen-word-1175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 581K Jan 22 23:49 gen-word-1180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 572K Jan 22 23:49 gen-word-1185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 585K Jan 22 23:49 gen-word-1190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 581K Jan 22 23:49 gen-word-1195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 595K Jan 22 23:49 gen-word-1200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 596K Jan 22 23:49 gen-word-1205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 63K Jan 22 23:49 gen-word-120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 589K Jan 22 23:49 gen-word-1210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 601K Jan 22 23:49 gen-word-1215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 604K Jan 22 23:49 gen-word-1220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 605K Jan 22 23:49 gen-word-1225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 601K Jan 22 23:49 gen-word-1230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 614K Jan 22 23:49 gen-word-1235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 608K Jan 22 23:49 gen-word-1240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 609K Jan 22 23:49 gen-word-1245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 613K Jan 22 23:49 gen-word-1250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 612K Jan 22 23:49 gen-word-1255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 65K Jan 22 23:49 gen-word-125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 626K Jan 22 23:49 gen-word-1260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 616K Jan 22 23:49 gen-word-1265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 627K Jan 22 23:49 gen-word-1270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 627K Jan 22 23:49 gen-word-1275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 630K Jan 22 23:49 gen-word-1280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 631K Jan 22 23:49 gen-word-1285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 642K Jan 22 23:49 gen-word-1290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 636K Jan 22 23:49 gen-word-1295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 635K Jan 22 23:49 gen-word-1300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 635K Jan 22 23:49 gen-word-1305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 70K Jan 22 23:49 gen-word-130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 634K Jan 22 23:49 gen-word-1310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 639K Jan 22 23:49 gen-word-1315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 645K Jan 22 23:49 gen-word-1320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 655K Jan 22 23:49 gen-word-1325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 654K Jan 22 23:49 gen-word-1330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 654K Jan 22 23:49 gen-word-1335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 656K Jan 22 23:49 gen-word-1340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 657K Jan 22 23:49 gen-word-1345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 661K Jan 22 23:49 gen-word-1350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 663K Jan 22 23:49 gen-word-1355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 72K Jan 22 23:49 gen-word-135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 671K Jan 22 23:49 gen-word-1360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 669K Jan 22 23:49 gen-word-1365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 674K Jan 22 23:49 gen-word-1370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 674K Jan 22 23:49 gen-word-1375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 676K Jan 22 23:49 gen-word-1380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 690K Jan 22 23:49 gen-word-1385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 683K Jan 22 23:49 gen-word-1390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 686K Jan 22 23:49 gen-word-1395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 675K Jan 22 23:49 gen-word-1400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 685K Jan 22 23:49 gen-word-1405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 75K Jan 22 23:49 gen-word-140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 687K Jan 22 23:49 gen-word-1410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 693K Jan 22 23:49 gen-word-1415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 693K Jan 22 23:49 gen-word-1420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 697K Jan 22 23:49 gen-word-1425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 700K Jan 22 23:49 gen-word-1430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 704K Jan 22 23:49 gen-word-1435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 707K Jan 22 23:49 gen-word-1440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 702K Jan 22 23:49 gen-word-1445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 712K Jan 22 23:49 gen-word-1450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 716K Jan 22 23:49 gen-word-1455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 74K Jan 22 23:49 gen-word-145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 716K Jan 22 23:49 gen-word-1460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 720K Jan 22 23:49 gen-word-1465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 715K Jan 22 23:49 gen-word-1470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 711K Jan 22 23:49 gen-word-1475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 726K Jan 22 23:49 gen-word-1480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 728K Jan 22 23:49 gen-word-1485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 731K Jan 22 23:49 gen-word-1490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 737K Jan 22 23:49 gen-word-1495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 735K Jan 22 23:49 gen-word-1500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 80K Jan 22 23:49 gen-word-150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 80K Jan 22 23:49 gen-word-155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 120K Jan 22 23:49 gen-word-15-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 84K Jan 22 23:49 gen-word-160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 88K Jan 22 23:49 gen-word-165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 86K Jan 22 23:49 gen-word-170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 88K Jan 22 23:49 gen-word-175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 92K Jan 22 23:49 gen-word-180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 95K Jan 22 23:49 gen-word-185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 97K Jan 22 23:49 gen-word-190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 100K Jan 22 23:49 gen-word-195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-1990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 105K Jan 22 23:49 gen-word-200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 108K Jan 22 23:49 gen-word-205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 147K Jan 22 23:49 gen-word-20-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 109K Jan 22 23:49 gen-word-210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 111K Jan 22 23:49 gen-word-215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 112K Jan 22 23:49 gen-word-220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 112K Jan 22 23:49 gen-word-225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 120K Jan 22 23:49 gen-word-230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 122K Jan 22 23:49 gen-word-235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 119K Jan 22 23:49 gen-word-240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 123K Jan 22 23:49 gen-word-245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 128K Jan 22 23:49 gen-word-250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 129K Jan 22 23:49 gen-word-255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 168K Jan 22 23:49 gen-word-25-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 128K Jan 22 23:49 gen-word-260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 133K Jan 22 23:49 gen-word-265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 141K Jan 22 23:49 gen-word-270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 139K Jan 22 23:49 gen-word-275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 138K Jan 22 23:49 gen-word-280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 142K Jan 22 23:49 gen-word-285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 144K Jan 22 23:49 gen-word-290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 146K Jan 22 23:49 gen-word-295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 152K Jan 22 23:49 gen-word-300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 153K Jan 22 23:49 gen-word-305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 198K Jan 22 23:49 gen-word-30-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 159K Jan 22 23:49 gen-word-310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 157K Jan 22 23:49 gen-word-315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 158K Jan 22 23:49 gen-word-320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 164K Jan 22 23:49 gen-word-325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 164K Jan 22 23:49 gen-word-330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 167K Jan 22 23:49 gen-word-335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 172K Jan 22 23:49 gen-word-340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 176K Jan 22 23:49 gen-word-345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 174K Jan 22 23:49 gen-word-350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 179K Jan 22 23:49 gen-word-355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 217K Jan 22 23:49 gen-word-35-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 179K Jan 22 23:49 gen-word-360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 182K Jan 22 23:49 gen-word-365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 182K Jan 22 23:49 gen-word-370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 187K Jan 22 23:49 gen-word-375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 188K Jan 22 23:49 gen-word-380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 190K Jan 22 23:49 gen-word-385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 196K Jan 22 23:49 gen-word-390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 194K Jan 22 23:49 gen-word-395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-3995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 200K Jan 22 23:49 gen-word-400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 202K Jan 22 23:49 gen-word-405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 243K Jan 22 23:49 gen-word-40-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 202K Jan 22 23:49 gen-word-410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 208K Jan 22 23:49 gen-word-415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 212K Jan 22 23:49 gen-word-420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 211K Jan 22 23:49 gen-word-425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 214K Jan 22 23:49 gen-word-430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 215K Jan 22 23:49 gen-word-435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 223K Jan 22 23:49 gen-word-440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 222K Jan 22 23:49 gen-word-445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 231K Jan 22 23:49 gen-word-450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 227K Jan 22 23:49 gen-word-455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 268K Jan 22 23:49 gen-word-45-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 226K Jan 22 23:49 gen-word-460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 232K Jan 22 23:49 gen-word-465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 232K Jan 22 23:49 gen-word-470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 235K Jan 22 23:49 gen-word-475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 247K Jan 22 23:49 gen-word-480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 240K Jan 22 23:49 gen-word-485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 243K Jan 22 23:49 gen-word-490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 248K Jan 22 23:49 gen-word-495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 249K Jan 22 23:49 gen-word-500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 253K Jan 22 23:49 gen-word-505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 292K Jan 22 23:49 gen-word-50-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 252K Jan 22 23:49 gen-word-510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 258K Jan 22 23:49 gen-word-515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 259K Jan 22 23:49 gen-word-520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 263K Jan 22 23:49 gen-word-525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 268K Jan 22 23:49 gen-word-530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 265K Jan 22 23:49 gen-word-535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 268K Jan 22 23:49 gen-word-540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 272K Jan 22 23:49 gen-word-545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 271K Jan 22 23:49 gen-word-550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 279K Jan 22 23:49 gen-word-555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 314K Jan 22 23:49 gen-word-55-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 276K Jan 22 23:49 gen-word-560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 280K Jan 22 23:49 gen-word-565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 287K Jan 22 23:49 gen-word-570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 283K Jan 22 23:49 gen-word-575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 294K Jan 22 23:49 gen-word-580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 288K Jan 22 23:49 gen-word-585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 297K Jan 22 23:49 gen-word-590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 297K Jan 22 23:49 gen-word-595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.8M Jan 22 23:49 gen-word-5975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.8M Jan 22 23:49 gen-word-5980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.8M Jan 22 23:49 gen-word-5995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 74K Jan 22 23:49 gen-word-5-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 5.8M Jan 22 23:49 gen-word-6000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 298K Jan 22 23:49 gen-word-600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 298K Jan 22 23:49 gen-word-605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 343K Jan 22 23:49 gen-word-60-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 307K Jan 22 23:49 gen-word-610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 301K Jan 22 23:49 gen-word-615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 301K Jan 22 23:49 gen-word-620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 306K Jan 22 23:49 gen-word-625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 314K Jan 22 23:49 gen-word-630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 321K Jan 22 23:49 gen-word-635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 314K Jan 22 23:49 gen-word-640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 311K Jan 22 23:49 gen-word-645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 329K Jan 22 23:49 gen-word-650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 323K Jan 22 23:49 gen-word-655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 361K Jan 22 23:49 gen-word-65-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 328K Jan 22 23:49 gen-word-660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 324K Jan 22 23:49 gen-word-665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 326K Jan 22 23:49 gen-word-670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 334K Jan 22 23:49 gen-word-675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 337K Jan 22 23:49 gen-word-680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 339K Jan 22 23:49 gen-word-685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 341K Jan 22 23:49 gen-word-690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 341K Jan 22 23:49 gen-word-695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 349K Jan 22 23:49 gen-word-700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 342K Jan 22 23:49 gen-word-705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 389K Jan 22 23:49 gen-word-70-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 355K Jan 22 23:49 gen-word-710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 348K Jan 22 23:49 gen-word-715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 357K Jan 22 23:49 gen-word-720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 360K Jan 22 23:49 gen-word-725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 360K Jan 22 23:49 gen-word-730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 362K Jan 22 23:49 gen-word-735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 360K Jan 22 23:49 gen-word-740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 366K Jan 22 23:49 gen-word-745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 374K Jan 22 23:49 gen-word-750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 374K Jan 22 23:49 gen-word-755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 407K Jan 22 23:49 gen-word-75-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 375K Jan 22 23:49 gen-word-760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 376K Jan 22 23:49 gen-word-765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 370K Jan 22 23:49 gen-word-770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 378K Jan 22 23:49 gen-word-775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 389K Jan 22 23:49 gen-word-780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 391K Jan 22 23:49 gen-word-785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 389K Jan 22 23:49 gen-word-790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 397K Jan 22 23:49 gen-word-795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 394K Jan 22 23:49 gen-word-800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 397K Jan 22 23:49 gen-word-805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 434K Jan 22 23:49 gen-word-80-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 403K Jan 22 23:49 gen-word-810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 399K Jan 22 23:49 gen-word-815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 407K Jan 22 23:49 gen-word-820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 407K Jan 22 23:49 gen-word-825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 410K Jan 22 23:49 gen-word-830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 416K Jan 22 23:49 gen-word-835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 414K Jan 22 23:49 gen-word-840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 414K Jan 22 23:49 gen-word-845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 420K Jan 22 23:49 gen-word-850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 418K Jan 22 23:49 gen-word-855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 457K Jan 22 23:49 gen-word-85-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 421K Jan 22 23:49 gen-word-860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 422K Jan 22 23:49 gen-word-865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 430K Jan 22 23:49 gen-word-870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 428K Jan 22 23:49 gen-word-875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 437K Jan 22 23:49 gen-word-880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 435K Jan 22 23:49 gen-word-885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 439K Jan 22 23:49 gen-word-890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 443K Jan 22 23:49 gen-word-895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 442K Jan 22 23:49 gen-word-900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 448K Jan 22 23:49 gen-word-905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 498K Jan 22 23:49 gen-word-90-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 447K Jan 22 23:49 gen-word-910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 456K Jan 22 23:49 gen-word-915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 447K Jan 22 23:49 gen-word-920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 456K Jan 22 23:49 gen-word-925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 462K Jan 22 23:49 gen-word-930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 457K Jan 22 23:49 gen-word-935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 462K Jan 22 23:49 gen-word-940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 462K Jan 22 23:49 gen-word-945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 468K Jan 22 23:49 gen-word-950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 465K Jan 22 23:49 gen-word-955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 500K Jan 22 23:49 gen-word-95-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 466K Jan 22 23:49 gen-word-960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 477K Jan 22 23:49 gen-word-965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 472K Jan 22 23:49 gen-word-970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 481K Jan 22 23:49 gen-word-975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 483K Jan 22 23:49 gen-word-980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 477K Jan 22 23:49 gen-word-985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 489K Jan 22 23:49 gen-word-990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 483K Jan 22 23:49 gen-word-995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 14M Jan 22 23:49 shuffle-word-100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 26M Jan 22 23:49 shuffle-word-10-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 22M Jan 22 23:49 shuffle-word-15-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 19M Jan 22 23:49 shuffle-word-20-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 18M Jan 22 23:49 shuffle-word-25-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 17M Jan 22 23:49 shuffle-word-30-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 17M Jan 22 23:49 shuffle-word-35-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 16M Jan 22 23:49 shuffle-word-40-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 16M Jan 22 23:49 shuffle-word-45-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5005-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5010-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5015-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5020-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5025-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5030-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5035-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5040-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5045-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5050-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5055-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5060-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5065-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5070-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5075-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5080-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5085-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5090-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5095-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-50-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5100-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5105-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5110-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5115-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5120-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5125-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5130-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5135-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5140-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5145-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5150-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5155-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5160-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5165-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5170-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5175-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5180-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5185-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5190-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5195-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5200-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5205-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5210-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5215-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5220-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5225-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5230-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5235-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5240-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5245-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5250-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5255-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5260-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5265-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5270-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5275-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5280-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5285-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5290-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5295-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5300-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5305-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5310-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5315-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5320-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5325-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5330-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5335-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5340-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5345-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5350-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5355-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5360-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5365-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5370-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5375-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5380-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5385-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5390-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5395-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5400-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5405-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5410-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5415-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5420-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5425-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5430-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5435-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5440-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5445-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5450-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5455-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5460-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5465-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5470-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5475-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5480-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5485-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5490-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5495-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5500-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5505-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5510-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5515-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5520-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5525-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5530-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5535-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5540-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5545-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5550-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-555-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-55-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-560-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-565-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-570-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-575-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-580-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-585-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-590-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-595-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5995-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 40M Jan 22 23:49 shuffle-word-5-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-6000-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-600-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-605-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-60-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-610-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-615-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-620-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-625-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-630-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-635-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-640-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-645-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-650-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-655-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-65-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-660-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-665-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-670-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-675-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-680-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-685-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-690-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-695-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-700-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-705-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-70-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-710-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-715-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-720-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-725-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-730-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-735-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-740-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-745-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-750-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-755-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-75-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-760-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-765-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-770-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-775-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-780-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-785-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-790-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-795-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-800-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-805-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-80-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-810-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-815-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-820-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-825-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-830-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-835-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-840-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-845-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-850-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-855-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-85-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-860-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-865-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-870-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-875-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-880-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-885-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-890-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-895-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-900-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-905-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 14M Jan 22 23:49 shuffle-word-90-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-910-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-915-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-920-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-925-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-930-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-935-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-940-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-945-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-950-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-955-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 14M Jan 22 23:49 shuffle-word-95-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-960-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-965-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-970-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-975-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-980-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-985-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-990-count.jsonl\n", + "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-995-count.jsonl\n" ] } ], @@ -4620,27 +6901,29 @@ "echo \"## Generating word reptition dataset ##\"\n", "\n", "#\n", - "# Training set for < 2000 words\n", + "# Training set for < 100 words\n", + "# We bump this aggressively, as its used to fill in packing\n", "#\n", - "for i in {5..2000..5} \n", + "for i in {5..100..5} \n", "do\n", - " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 150 & \n", - " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 500 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 500 & \n", "done\n", "\n", "#\n", - "# Ramping up the 50+ - 400 words dataset\n", + "# Ramping up the 50+ - 1500 words dataset\n", + "# This is to ensure there is ramp from the previous models\n", "# \n", - "for i in {2005..3000..5} \n", + "for i in {105..1500..5} \n", "do\n", - " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", - " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 50 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 50 & \n", "done\n", "\n", "#\n", - "# Ramping up the 50+ - 400 words dataset\n", + "# Ramping up the 1500+ - 6000 words dataset\n", "# \n", - "for i in {3005..4000..5} \n", + "for i in {1505..6000..5} \n", "do\n", " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", @@ -4651,13 +6934,316 @@ "\n", "ls -alh ./dataset/" ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolving data files: 100%|█████████████| 2400/2400 [00:00<00:00, 365795.62it/s]\n", + "Generating train split: 1258813 examples [00:16, 78115.78 examples/s] \n", + "Map (num_proc=160): 100%|███| 1258813/1258813 [01:30<00:00, 13918.46 examples/s]\n", + "Filter (num_proc=160): 100%|█| 1258813/1258813 [00:49<00:00, 25674.24 examples/s\n", + "Map (num_proc=160): 100%|██| 1238639/1238639 [00:06<00:00, 189798.96 examples/s]\n", + "Map (num_proc=160): 100%|███| 1238639/1238639 [01:09<00:00, 17869.03 examples/s]\n", + "Map (num_proc=160): 100%|██████| 127252/127252 [00:15<00:00, 8260.92 examples/s]\n", + "Saving the dataset (11/11 shards): 100%|█| 127252/127252 [00:05<00:00, 21357.11 \n", + "Saving the dataset (1/1 shards): 100%|█| 6225/6225 [00:00<00:00, 62482.00 exampl\n" + ] + } + ], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-23 00:03:23,061] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8x4090] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=2048, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=2048'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8x4090] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=2048, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=2048'].\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 3160641161\n", + "Seed set to 3160641161\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 256\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 4\n", + " - effective_batch_size: 256\n", + "\n", + "[rank: 0] Seed set to 3160641161\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-23 00:03:40,928] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 00:03:41,167] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 00:03:41,174] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 00:03:41,174] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 00:03:41,177] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 00:03:41,194] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 00:03:41,253] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", + "[rank: 7] Seed set to 3160641161\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 2] Seed set to 3160641161\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 4] Seed set to 3160641161\n", + "[rank: 5] Seed set to 3160641161\n", + "[rank: 1] Seed set to 3160641161\n", + "[rank: 3] Seed set to 3160641161\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 6] Seed set to 3160641161\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 7] Seed set to 3160641161\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 4] Seed set to 3160641161\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 2] Seed set to 3160641161\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 6] Seed set to 3160641161\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 3] Seed set to 3160641161\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 1] Seed set to 3160641161\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 5] Seed set to 3160641161\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_000416-1quitpef\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8x4090] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=2048, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/1quitpef\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 4.000e-04 (0.0004)\n", + " - lr_final: 3.000e-04 (0.0003)\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.06246376037597656 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10220813751220703 seconds\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10210061073303223 seconds\n", + "Time to load fused_adam op: 0.1024923324584961 seconds\n", + "Time to load fused_adam op: 0.10265374183654785 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10258603096008301 seconds\n", + "Time to load fused_adam op: 0.10260725021362305 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.1026144027709961 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "Epoch 0: 5%| | 100/1989 [06:48<2:08:42, 0.24it/s, v_num=tpef, train/loss=4.56/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 62%|▌| 1235/1989 [1:22:32<50:23, 0.25it/s, v_num=tpef, train/loss=7.7" + ] + } + ], + "source": [ + "## Finetune 2 (2k -> 8k) : The actual tune!\n", + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-2-tune.yaml\" \\\n", + " --model.load_model=\"../model/Memory-Tune-Stage-1-{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-2 (bs=256, train-ctx=2048, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=8 \\\n", + " --model.ctx_len=2048" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:rwkv-infctx]", + "display_name": "rwkv-infctx", "language": "python", - "name": "conda-env-rwkv-infctx-py" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml index dca4c7ce..d9928b1d 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml @@ -252,7 +252,8 @@ model: # ---------------------------- # Skip tokens that are already learnt, and are below the target threshold - token_loss_threshold: 0.01 + token_loss_threshold: 0.02 + # Perform token based drop out at random, to the target % rate # token_dropout_rate: 0.0 @@ -290,7 +291,7 @@ data: # After loading the dataset, split out test data used for validation, # This process is skipped if the dataset includes a test split # This process is skipped if set to zero - test_split: 0.01 + test_split: 0.005 test_split_shuffle: true # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer @@ -304,7 +305,7 @@ data: # # This is ignored, if set to -1 min_token_size: -1 - max_token_size: 2048 + max_token_size: 4096 # Rechunking of text dataset, this is done only when source is set as 'text' # and will merge the various sentencees, into larger chunks up to the target size @@ -384,7 +385,7 @@ data: # Minimum size to pack up to, this should be a multiple of packing_chunksize # defautls to -1, which equals to packing_chunksize - packing_min_ctx_len: -1 + packing_min_ctx_len: 8192 # Pack the data sequentially if possible, in accordance to the dataset sequence # this can be used together with sort_by_length @@ -401,7 +402,7 @@ data: # Reverse the training dataset order before saving, this is useful for, # optimizing dataset packing process, when using packing_in_sequence # and sort_by_length desc order together - reverse_train_dataset_before_save: True + reverse_train_dataset_before_save: true # Path to the current checkpoint to continue training from # Enable this to the last checkpoint after the first run diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml new file mode 100644 index 00000000..78357dcc --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml @@ -0,0 +1,411 @@ +# lightning.pytorch==2.0.2 +seed_everything: true +trainer: + # Configure the number of GPU, avaliable on your machine + accelerator: gpu + devices: auto + num_nodes: 1 + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_1 + + # Floating point precision for the model, because RWKV is built FOR bf16 + # you should pretty much never change this setting + precision: bf16 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'stage-2-memory-finetune-1 (bs=256, train-ctx=512)' + # name: 'Echo-B-1B4 Foundation' + project: 'RWKV-Memory-Experiment' + tags: ['RWKV', 'memory-exp'] + id: null + save_dir: . + version: null + offline: false + dir: null + anonymous: null + log_model: false + experiment: null + prefix: '' + checkpoint_name: null + job_type: null + config: null + entity: null + reinit: null + group: null + notes: null + magic: null + config_exclude_keys: null + config_include_keys: null + mode: null + allow_val_change: null + resume: null + force: null + tensorboard: null + sync_tensorboard: null + monitor_gym: null + save_code: null + settings: null + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/v5-exp/memory-test/stage-2-memory-finetune/ + filename: null + + # Save the top/last K checkpoints + save_top_k: 2 + # Choose by the most recent checkpoints (time based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: true + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: 25 + every_n_epochs: null + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: 1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + target_batch_size: 256 + + # # Number of datasamples to accumulate before backproping, per GPU + # # this can't be used with target_batch_size. + # accumulate_grad_batches: -1 + + # Various other settings, you probably want to leave alone + fast_dev_run: false + limit_train_batches: null + limit_val_batches: null + limit_test_batches: null + limit_predict_batches: null + overfit_batches: 0.0 + val_check_interval: null + check_val_every_n_epoch: 1 + num_sanity_val_steps: 0 + log_every_n_steps: 1 + enable_checkpointing: null + enable_progress_bar: null + enable_model_summary: null + gradient_clip_val: 1.0 + gradient_clip_algorithm: null + deterministic: null + benchmark: null + inference_mode: true + use_distributed_sampler: true + profiler: null + detect_anomaly: false + barebones: false + plugins: null + sync_batchnorm: false + reload_dataloaders_every_n_epochs: 0 + default_root_dir: null + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/Echo-B-1B4-Stage2.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Learning rate of the training process + # --- + # Initia learning rate of the process + lr_init: 4e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + # + # NOTE: lr_final / lr_period does not work with warmup_steps + # and will be ignored (or replaced) with the warmup_steps logic instead + lr_final: 3e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # Experimental cutoff settings + # --- + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + # + # Leave it as a blank array to disable the feature + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + # + # Leave it as a blank array to disable the feature + ctx_len_warmup_steps: [] + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # We limit bptt_learning_range, to 1, to ensure high throughput within a multi-gpu setup. + # (by skipping some syncronization code). Additionally, as bptt learning should not be triggering + # anyway as the data sample should be within ctx size 99% of the time + bptt_learning: true + bptt_learning_range: 1 + + # various other settings you probably should leave alone + grad_cp: true + warmup_steps: -1 + layerwise_lr: true + dim_att: null + dim_ffn: null + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Skip tokens that are already learnt, and are below the target threshold + token_loss_threshold: 0.02 + + # Perform token based drop out at random, to the target % rate + # token_dropout_rate: 0.0 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/v5-exp/memory-test/stage-2/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: json + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: ../notebook/rwkv-x-exp/v5-exp/memory-test/dataset + + # After loading the dataset, split out test data used for validation, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.005 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: -1 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 2048 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: false + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['input_prefix', 'input', 'output_prefix', 'output', 'closing'] + # multi_column_prefix: ['', '', '', '', ''] + # multi_column_train_mask: [true, false, true, true, true] + # multi_column_separator: '' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_mask: false + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Prefix token masking + # + # The rationale behind this, is that the first X tokens should not be "backpropped" + # for any new training record. As its unfair to expect the model (or a human) make + # any resonable guesses at that stage. As such this is used to "mask" the first X tokens + # from the loss calculation, and thus not backpropped. + data_prefix_skip_mask: 0 + + # Additional source dataset params, used to grab subsets of the dataset + # --- + # source_dataset_params: + # language: en + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 256 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: true + + # ---------------------------- + # Sort before packing, and reverse before saving + # ---------------------------- + + # Sort the dataset by length, useful to reduce gpu waiting time (also useful for RWKV long context coherence) + sort_by_length: true + sort_asc: false # Sort in ascending order, true = shortest first, false = longest first + + # Reverse the training dataset order before saving, this is useful for, + # optimizing dataset packing process, when using packing_in_sequence + # and sort_by_length desc order together + reverse_train_dataset_before_save: true + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/xyz/last.ckpt +ckpt_path: null From ebab809b83743eabc1a23eb5cb524d4708963d84 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Tue, 23 Jan 2024 04:21:09 +0000 Subject: [PATCH 20/23] WIP returne --- .../rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb index c7d9aa6f..45202ecc 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb @@ -7218,7 +7218,7 @@ " warnings.warn(\n", "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "Epoch 0: 62%|▌| 1235/1989 [1:22:32<50:23, 0.25it/s, v_num=tpef, train/loss=7.7" + "Epoch 0: 62%|▌| 1243/1989 [1:22:56<49:46, 0.25it/s, v_num=tpef, train/loss=7.7^C\n" ] } ], From 54c816a0b63aed9285c7ae0c31b07f21c1edb17d Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Tue, 23 Jan 2024 19:52:03 +0000 Subject: [PATCH 21/23] 1B5 memory finetune --- .../memory-test/World-1B5-mem-finetune.ipynb | 9438 +++++------------ .../v5-exp/memory-test/stage-1-tune.yaml | 8 +- .../v5-exp/memory-test/stage-2-tune.yaml | 8 +- 3 files changed, 2664 insertions(+), 6790 deletions(-) diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb index 45202ecc..7bd2747b 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb @@ -3,7 +3,17 @@ { "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "id": "ecd2a3ff", + "metadata": { + "papermill": { + "duration": 0.003441, + "end_time": "2024-01-23T13:12:26.844416", + "exception": false, + "start_time": "2024-01-23T13:12:26.840975", + "status": "completed" + }, + "tags": [] + }, "source": [ "# RWKV World Memory Finetune (Memory Finetune)\n", "\n", @@ -17,14 +27,24 @@ "\n", "In practise however, the model may show \"attention range\" longer then what is benchmarked, as natural text is highly compressible. Unlike the pure randomized data that was being tested here.\n", "\n", - "This runner has been optimized to run on 8 x 24GB vram nodes, you should allocate atleast 500GB disk space.\n", + "This runner has been optimized to run on 8 x 80GB vram nodes, you should allocate atleast 1TB disk space.\n", "\n", "> This project assumes you have the rwkv-infctx conda env setup, and you are executing in that environment - see the main README.md for the conda env setup steps" ] }, { "cell_type": "markdown", - "metadata": {}, + "id": "5a185acc", + "metadata": { + "papermill": { + "duration": 0.00437, + "end_time": "2024-01-23T13:12:26.854086", + "exception": false, + "start_time": "2024-01-23T13:12:26.849716", + "status": "completed" + }, + "tags": [] + }, "source": [ "## Configure your environment settings\n", "(!Important: you will need to rerun the below cell, if you restart your kernel)" @@ -32,8 +52,24 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "execution_count": 1, + "id": "f8b3c0c6", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:26.859575Z", + "iopub.status.busy": "2024-01-23T13:12:26.859381Z", + "iopub.status.idle": "2024-01-23T13:12:26.871264Z", + "shell.execute_reply": "2024-01-23T13:12:26.870420Z" + }, + "papermill": { + "duration": 0.017126, + "end_time": "2024-01-23T13:12:26.873493", + "exception": false, + "start_time": "2024-01-23T13:12:26.856367", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -42,9 +78,9 @@ "DEEPSPEED_STRAT: deepspeed_stage_1\n", "ENABLE_WANDB: True\n", "GPU_DEVICES: auto\n", - "NOTEBOOK_DIR: /home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", - "TRAINER_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v5\n", - "PROJECT_DIR: /home/recursal/RWKV-infctx-trainer\n" + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" ] } ], @@ -52,7 +88,7 @@ "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", "GPU_DEVICES=\"auto\"\n", "ENABLE_WANDB=True\n", - "WANDB_PREFIX=\"[8x4090] RWKV-v5-1B5-World\"\n", + "WANDB_PREFIX=\"[8xA100] RWKV-v5-1B5-World\"\n", "\n", "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", @@ -81,7 +117,17 @@ }, { "cell_type": "markdown", - "metadata": {}, + "id": "6adf698d", + "metadata": { + "papermill": { + "duration": 0.002976, + "end_time": "2024-01-23T13:12:26.883554", + "exception": false, + "start_time": "2024-01-23T13:12:26.880578", + "status": "completed" + }, + "tags": [] + }, "source": [ "## Download the pretrained model\n", "(if you want to skip the the basemodel train + instruct tune)\n" @@ -89,17 +135,25 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File ‘RWKV-v5-1B5-world.pth’ already there; not retrieving.\n" - ] - } - ], + "execution_count": null, + "id": "7b64f1b0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:26.889176Z", + "iopub.status.busy": "2024-01-23T13:12:26.888991Z", + "iopub.status.idle": "2024-01-23T13:12:27.384385Z", + "shell.execute_reply": "2024-01-23T13:12:27.382982Z" + }, + "papermill": { + "duration": 0.501114, + "end_time": "2024-01-23T13:12:27.387069", + "exception": false, + "start_time": "2024-01-23T13:12:26.885955", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], "source": [ "# Lets wget the model files\n", "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", @@ -110,42 +164,44 @@ { "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "id": "f73ffd44", + "metadata": { + "papermill": { + "duration": 0.005131, + "end_time": "2024-01-23T13:12:27.397590", + "exception": false, + "start_time": "2024-01-23T13:12:27.392459", + "status": "completed" + }, + "tags": [] + }, "source": [ - "## Finetune 1 (0 -> 2k) : Dataset preperation\n", + "## Finetune 1 (0 -> 4k) : Dataset preperation\n", "\n", "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: rwkv in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (0.8.22)\n", - "Requirement already satisfied: asyncio in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (3.4.3)\n", - "Requirement already satisfied: aiocsv in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (1.2.5)\n", - "Requirement already satisfied: aiofiles in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (23.2.1)\n", - "Requirement already satisfied: tokenizers>=0.13.2 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from rwkv) (0.15.0)\n", - "Requirement already satisfied: huggingface_hub<1.0,>=0.16.4 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from tokenizers>=0.13.2->rwkv) (0.20.2)\n", - "Requirement already satisfied: filelock in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (3.13.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (2023.10.0)\n", - "Requirement already satisfied: requests in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (4.66.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (6.0.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (4.9.0)\n", - "Requirement already satisfied: packaging>=20.9 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (23.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from requests->huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (2.0.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from requests->huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from requests->huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (1.26.18)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages (from requests->huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->rwkv) (2023.11.17)\n" - ] - } - ], + "execution_count": null, + "id": "14058c3c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:27.404298Z", + "iopub.status.busy": "2024-01-23T13:12:27.403896Z", + "iopub.status.idle": "2024-01-23T13:12:31.171512Z", + "shell.execute_reply": "2024-01-23T13:12:31.170138Z" + }, + "papermill": { + "duration": 3.774062, + "end_time": "2024-01-23T13:12:31.174157", + "exception": false, + "start_time": "2024-01-23T13:12:27.400095", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], "source": [ "# Folder and eval pip setup\n", "!cp -r \"{MEMORY_SCRIPT_DIR}/\" \"{NOTEBOOK_DIR}/\"\n", @@ -154,1224 +210,25 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "## Generating word reptition dataset ##\n", - "Generated JSONL file with - 5 max words, 500 samples - at ./dataset/gen-word-5-count.jsonl\n", - "Generated JSONL file with - 15 max words, 500 samples - at ./dataset/gen-word-15-count.jsonl\n", - "Generated JSONL file with - 2 max words, 300 samples - at ./dataset/word-2-count.jsonl\n", - "Generated JSONL file with - 10 max words, 500 samples - at ./dataset/gen-word-10-count.jsonl\n", - "Generated JSONL file with - 25 max words, 500 samples - at ./dataset/gen-word-25-count.jsonl\n", - "Generated JSONL file with - 35 max words, 500 samples - at ./dataset/gen-word-35-count.jsonl\n", - "Generated JSONL file with - 50 max words, 500 samples - at ./dataset/gen-word-50-count.jsonl\n", - "Generated JSONL file with - 125 max words, 125 samples - at ./dataset/gen-word-125-count.jsonl\n", - "Generated JSONL file with - 140 max words, 125 samples - at ./dataset/gen-word-140-count.jsonl\n", - "Generated JSONL file with - 65 max words, 500 samples - at ./dataset/gen-word-65-count.jsonl\n", - "Generated JSONL file with - 20 max words, 500 samples - at ./dataset/gen-word-20-count.jsonl\n", - "Generated JSONL file with - 105 max words, 125 samples - at ./dataset/gen-word-105-count.jsonl\n", - "Generated JSONL file with - 180 max words, 125 samples - at ./dataset/gen-word-180-count.jsonl\n", - "Generated JSONL file with - 4 max words, 1000 samples - at ./dataset/word-2-count.jsonl\n", - "Generated JSONL file with - 160 max words, 125 samples - at ./dataset/gen-word-160-count.jsonl\n", - "Generated JSONL file with - 165 max words, 125 samples - at ./dataset/gen-word-165-count.jsonl\n", - "Generated JSONL file with - 175 max words, 125 samples - at ./dataset/gen-word-175-count.jsonl\n", - "Generated JSONL file with - 220 max words, 100 samples - at ./dataset/gen-word-220-count.jsonl\n", - "Generated JSONL file with - 30 max words, 500 samples - at ./dataset/gen-word-30-count.jsonl\n", - "Generated JSONL file with - 40 max words, 500 samples - at ./dataset/gen-word-40-count.jsonl\n", - "Generated JSONL file with - 110 max words, 125 samples - at ./dataset/gen-word-110-count.jsonl\n", - "Generated JSONL file with - 55 max words, 500 samples - at ./dataset/gen-word-55-count.jsonl\n", - "Generated JSONL file with - 245 max words, 100 samples - at ./dataset/gen-word-245-count.jsonl\n", - "Generated JSONL file with - 130 max words, 125 samples - at ./dataset/gen-word-130-count.jsonl\n", - "Generated JSONL file with - 115 max words, 125 samples - at ./dataset/gen-word-115-count.jsonl\n", - "Generated JSONL file with - 45 max words, 500 samples - at ./dataset/gen-word-45-count.jsonl\n", - "Generated JSONL file with - 285 max words, 100 samples - at ./dataset/gen-word-285-count.jsonl\n", - "Generated JSONL file with - 120 max words, 125 samples - at ./dataset/gen-word-120-count.jsonl\n", - "Generated JSONL file with - 205 max words, 100 samples - at ./dataset/gen-word-205-count.jsonl\n", - "Generated JSONL file with - 150 max words, 125 samples - at ./dataset/gen-word-150-count.jsonl\n", - "Generated JSONL file with - 170 max words, 125 samples - at ./dataset/gen-word-170-count.jsonl\n", - "Generated JSONL file with - 145 max words, 125 samples - at ./dataset/gen-word-145-count.jsonl\n", - "Generated JSONL file with - 70 max words, 500 samples - at ./dataset/gen-word-70-count.jsonl\n", - "Generated JSONL file with - 75 max words, 500 samples - at ./dataset/gen-word-75-count.jsonl\n", - "Generated JSONL file with - 135 max words, 125 samples - at ./dataset/gen-word-135-count.jsonl\n", - "Generated JSONL file with - 295 max words, 100 samples - at ./dataset/gen-word-295-count.jsonl\n", - "Generated JSONL file with - 155 max words, 125 samples - at ./dataset/gen-word-155-count.jsonl\n", - "Generated JSONL file with - 60 max words, 500 samples - at ./dataset/gen-word-60-count.jsonl\n", - "Generated JSONL file with - 360 max words, 100 samples - at ./dataset/gen-word-360-count.jsonl\n", - "Generated JSONL file with - 185 max words, 125 samples - at ./dataset/gen-word-185-count.jsonl\n", - "Generated JSONL file with - 85 max words, 500 samples - at ./dataset/gen-word-85-count.jsonl\n", - "Generated JSONL file with - 210 max words, 100 samples - at ./dataset/gen-word-210-count.jsonl\n", - "Generated JSONL file with - 190 max words, 125 samples - at ./dataset/gen-word-190-count.jsonl\n", - "Generated JSONL file with - 215 max words, 100 samples - at ./dataset/gen-word-215-count.jsonl\n", - "Generated JSONL file with - 200 max words, 125 samples - at ./dataset/gen-word-200-count.jsonl\n", - "Generated JSONL file with - 90 max words, 500 samples - at ./dataset/gen-word-90-count.jsonl\n", - "Generated JSONL file with - 235 max words, 100 samples - at ./dataset/gen-word-235-count.jsonl\n", - "Generated JSONL file with - 290 max words, 100 samples - at ./dataset/gen-word-290-count.jsonl\n", - "Generated JSONL file with - 80 max words, 500 samples - at ./dataset/gen-word-80-count.jsonl\n", - "Generated JSONL file with - 95 max words, 500 samples - at ./dataset/gen-word-95-count.jsonl\n", - "Generated JSONL file with - 305 max words, 100 samples - at ./dataset/gen-word-305-count.jsonl\n", - "Generated JSONL file with - 265 max words, 100 samples - at ./dataset/gen-word-265-count.jsonl\n", - "Generated JSONL file with - 250 max words, 100 samples - at ./dataset/gen-word-250-count.jsonl\n", - "Generated JSONL file with - 355 max words, 100 samples - at ./dataset/gen-word-355-count.jsonl\n", - "Generated JSONL file with - 340 max words, 100 samples - at ./dataset/gen-word-340-count.jsonl\n", - "Generated JSONL file with - 195 max words, 125 samples - at ./dataset/gen-word-195-count.jsonl\n", - "Generated JSONL file with - 225 max words, 100 samples - at ./dataset/gen-word-225-count.jsonl\n", - "Generated JSONL file with - 270 max words, 100 samples - at ./dataset/gen-word-270-count.jsonl\n", - "Generated JSONL file with - 365 max words, 100 samples - at ./dataset/gen-word-365-count.jsonl\n", - "Generated JSONL file with - 230 max words, 100 samples - at ./dataset/gen-word-230-count.jsonl\n", - "Generated JSONL file with - 255 max words, 100 samples - at ./dataset/gen-word-255-count.jsonl\n", - "Generated JSONL file with - 260 max words, 100 samples - at ./dataset/gen-word-260-count.jsonl\n", - "Generated JSONL file with - 100 max words, 500 samples - at ./dataset/gen-word-100-count.jsonl\n", - "Generated JSONL file with - 300 max words, 100 samples - at ./dataset/gen-word-300-count.jsonl\n", - "Generated JSONL file with - 240 max words, 100 samples - at ./dataset/gen-word-240-count.jsonl\n", - "Generated JSONL file with - 275 max words, 100 samples - at ./dataset/gen-word-275-count.jsonl\n", - "Generated JSONL file with - 280 max words, 100 samples - at ./dataset/gen-word-280-count.jsonl\n", - "Generated JSONL file with - 310 max words, 100 samples - at ./dataset/gen-word-310-count.jsonl\n", - "Generated JSONL file with - 315 max words, 100 samples - at ./dataset/gen-word-315-count.jsonl\n", - "Generated JSONL file with - 350 max words, 100 samples - at ./dataset/gen-word-350-count.jsonl\n", - "Generated JSONL file with - 370 max words, 100 samples - at ./dataset/gen-word-370-count.jsonl\n", - "Generated JSONL file with - 420 max words, 100 samples - at ./dataset/gen-word-420-count.jsonl\n", - "Generated JSONL file with - 650 max words, 100 samples - at ./dataset/gen-word-650-count.jsonl\n", - "Generated JSONL file with - 320 max words, 100 samples - at ./dataset/gen-word-320-count.jsonl\n", - "Generated JSONL file with - 455 max words, 100 samples - at ./dataset/gen-word-455-count.jsonl\n", - "Generated JSONL file with - 390 max words, 100 samples - at ./dataset/gen-word-390-count.jsonl\n", - "Generated JSONL file with - 400 max words, 100 samples - at ./dataset/gen-word-400-count.jsonl\n", - "Generated JSONL file with - 540 max words, 100 samples - at ./dataset/gen-word-540-count.jsonl\n", - "Generated JSONL file with - 395 max words, 100 samples - at ./dataset/gen-word-395-count.jsonl\n", - "Generated JSONL file with - 700 max words, 100 samples - at ./dataset/gen-word-700-count.jsonl\n", - "Generated JSONL file with - 645 max words, 100 samples - at ./dataset/gen-word-645-count.jsonl\n", - "Generated JSONL file with - 375 max words, 100 samples - at ./dataset/gen-word-375-count.jsonl\n", - "Generated JSONL file with - 380 max words, 100 samples - at ./dataset/gen-word-380-count.jsonl\n", - "Generated JSONL file with - 655 max words, 100 samples - at ./dataset/gen-word-655-count.jsonl\n", - "Generated JSONL file with - 485 max words, 100 samples - at ./dataset/gen-word-485-count.jsonl\n", - "Generated JSONL file with - 660 max words, 100 samples - at ./dataset/gen-word-660-count.jsonl\n", - "Generated JSONL file with - 675 max words, 100 samples - at ./dataset/gen-word-675-count.jsonl\n", - "Generated JSONL file with - 690 max words, 100 samples - at ./dataset/gen-word-690-count.jsonl\n", - "Generated JSONL file with - 560 max words, 100 samples - at ./dataset/gen-word-560-count.jsonl\n", - "Generated JSONL file with - 760 max words, 100 samples - at ./dataset/gen-word-760-count.jsonl\n", - "Generated JSONL file with - 605 max words, 100 samples - at ./dataset/gen-word-605-count.jsonl\n", - "Generated JSONL file with - 430 max words, 100 samples - at ./dataset/gen-word-430-count.jsonl\n", - "Generated JSONL file with - 550 max words, 100 samples - at ./dataset/gen-word-550-count.jsonl\n", - "Generated JSONL file with - 815 max words, 100 samples - at ./dataset/gen-word-815-count.jsonl\n", - "Generated JSONL file with - 715 max words, 100 samples - at ./dataset/gen-word-715-count.jsonl\n", - "Generated JSONL file with - 495 max words, 100 samples - at ./dataset/gen-word-495-count.jsonl\n", - "Generated JSONL file with - 795 max words, 100 samples - at ./dataset/gen-word-795-count.jsonl\n", - "Generated JSONL file with - 465 max words, 100 samples - at ./dataset/gen-word-465-count.jsonl\n", - "Generated JSONL file with - 840 max words, 100 samples - at ./dataset/gen-word-840-count.jsonl\n", - "Generated JSONL file with - 330 max words, 100 samples - at ./dataset/gen-word-330-count.jsonl\n", - "Generated JSONL file with - 545 max words, 100 samples - at ./dataset/gen-word-545-count.jsonl\n", - "Generated JSONL file with - 335 max words, 100 samples - at ./dataset/gen-word-335-count.jsonl\n", - "Generated JSONL file with - 830 max words, 100 samples - at ./dataset/gen-word-830-count.jsonl\n", - "Generated JSONL file with - 525 max words, 100 samples - at ./dataset/gen-word-525-count.jsonl\n", - "Generated a single JSONL file with 1348 samples (100 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", - "Generated JSONL file with - 425 max words, 100 samples - at ./dataset/gen-word-425-count.jsonl\n", - "Generated a single JSONL file with 705 samples (100 token repeat) - 365 max words - at ./dataset/shuffle-word-365-count.jsonl\n", - "Generated JSONL file with - 765 max words, 100 samples - at ./dataset/gen-word-765-count.jsonl\n", - "Generated JSONL file with - 725 max words, 100 samples - at ./dataset/gen-word-725-count.jsonl\n", - "Generated JSONL file with - 325 max words, 100 samples - at ./dataset/gen-word-325-count.jsonl\n", - "Generated JSONL file with - 435 max words, 100 samples - at ./dataset/gen-word-435-count.jsonl\n", - "Generated JSONL file with - 640 max words, 100 samples - at ./dataset/gen-word-640-count.jsonl\n", - "Generated JSONL file with - 755 max words, 100 samples - at ./dataset/gen-word-755-count.jsonl\n", - "Generated JSONL file with - 475 max words, 100 samples - at ./dataset/gen-word-475-count.jsonl\n", - "Generated JSONL file with - 695 max words, 100 samples - at ./dataset/gen-word-695-count.jsonl\n", - "Generated JSONL file with - 535 max words, 100 samples - at ./dataset/gen-word-535-count.jsonl\n", - "Generated JSONL file with - 575 max words, 100 samples - at ./dataset/gen-word-575-count.jsonl\n", - "Generated JSONL file with - 590 max words, 100 samples - at ./dataset/gen-word-590-count.jsonl\n", - "Generated a single JSONL file with 703 samples (100 token repeat) - 355 max words - at ./dataset/shuffle-word-355-count.jsonl\n", - "Generated JSONL file with - 345 max words, 100 samples - at ./dataset/gen-word-345-count.jsonl\n", - "Generated a single JSONL file with 4426 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", - "Generated JSONL file with - 490 max words, 100 samples - at ./dataset/gen-word-490-count.jsonl\n", - "Generated JSONL file with - 670 max words, 100 samples - at ./dataset/gen-word-670-count.jsonl\n", - "Generated a single JSONL file with 588 samples (100 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", - "Generated a single JSONL file with 401 samples (100 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", - "Generated JSONL file with - 800 max words, 100 samples - at ./dataset/gen-word-800-count.jsonl\n", - "Generated JSONL file with - 530 max words, 100 samples - at ./dataset/gen-word-530-count.jsonl\n", - "Generated JSONL file with - 925 max words, 100 samples - at ./dataset/gen-word-925-count.jsonl\n", - "Generated JSONL file with - 785 max words, 100 samples - at ./dataset/gen-word-785-count.jsonl\n", - "Generated JSONL file with - 385 max words, 100 samples - at ./dataset/gen-word-385-count.jsonl\n", - "Generated JSONL file with - 685 max words, 100 samples - at ./dataset/gen-word-685-count.jsonl\n", - "Generated JSONL file with - 500 max words, 100 samples - at ./dataset/gen-word-500-count.jsonl\n", - "Generated a single JSONL file with 705 samples (100 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", - "Generated JSONL file with - 585 max words, 100 samples - at ./dataset/gen-word-585-count.jsonl\n", - "Generated a single JSONL file with 405 samples (100 token repeat) - 685 max words - at ./dataset/shuffle-word-685-count.jsonl\n", - "Generated a single JSONL file with 3125 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", - "Generated JSONL file with - 845 max words, 100 samples - at ./dataset/gen-word-845-count.jsonl\n", - "Generated a single JSONL file with 1370 samples (100 token repeat) - 185 max words - at ./dataset/shuffle-word-185-count.jsonl\n", - "Generated JSONL file with - 860 max words, 100 samples - at ./dataset/gen-word-860-count.jsonl\n", - "Generated JSONL file with - 625 max words, 100 samples - at ./dataset/gen-word-625-count.jsonl\n", - "Generated a single JSONL file with 3761 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", - "Generated JSONL file with - 750 max words, 100 samples - at ./dataset/gen-word-750-count.jsonl\n", - "Generated a single JSONL file with 3542 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", - "Generated a single JSONL file with 405 samples (100 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", - "Generated JSONL file with - 450 max words, 100 samples - at ./dataset/gen-word-450-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", - "Generated a single JSONL file with 315 samples (100 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", - "Generated JSONL file with - 665 max words, 100 samples - at ./dataset/gen-word-665-count.jsonl\n", - "Generated a single JSONL file with 706 samples (100 token repeat) - 345 max words - at ./dataset/shuffle-word-345-count.jsonl\n", - "Generated JSONL file with - 710 max words, 100 samples - at ./dataset/gen-word-710-count.jsonl\n", - "Generated JSONL file with - 600 max words, 100 samples - at ./dataset/gen-word-600-count.jsonl\n", - "Generated JSONL file with - 880 max words, 100 samples - at ./dataset/gen-word-880-count.jsonl\n", - "Generated JSONL file with - 680 max words, 100 samples - at ./dataset/gen-word-680-count.jsonl\n", - "Generated JSONL file with - 730 max words, 100 samples - at ./dataset/gen-word-730-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 755 max words - at ./dataset/shuffle-word-755-count.jsonl\n", - "Generated JSONL file with - 580 max words, 100 samples - at ./dataset/gen-word-580-count.jsonl\n", - "Generated JSONL file with - 805 max words, 100 samples - at ./dataset/gen-word-805-count.jsonl\n", - "Generated JSONL file with - 735 max words, 100 samples - at ./dataset/gen-word-735-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 785 max words - at ./dataset/shuffle-word-785-count.jsonl\n", - "Generated JSONL file with - 775 max words, 100 samples - at ./dataset/gen-word-775-count.jsonl\n", - "Generated JSONL file with - 705 max words, 100 samples - at ./dataset/gen-word-705-count.jsonl\n", - "Generated JSONL file with - 610 max words, 100 samples - at ./dataset/gen-word-610-count.jsonl\n", - "Generated JSONL file with - 770 max words, 100 samples - at ./dataset/gen-word-770-count.jsonl\n", - "Generated a single JSONL file with 4071 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", - "Generated JSONL file with - 780 max words, 100 samples - at ./dataset/gen-word-780-count.jsonl\n", - "Generated JSONL file with - 825 max words, 100 samples - at ./dataset/gen-word-825-count.jsonl\n", - "Generated JSONL file with - 885 max words, 100 samples - at ./dataset/gen-word-885-count.jsonl\n", - "Generated JSONL file with - 740 max words, 100 samples - at ./dataset/gen-word-740-count.jsonl\n", - "Generated JSONL file with - 915 max words, 100 samples - at ./dataset/gen-word-915-count.jsonl\n", - "Generated JSONL file with - 855 max words, 100 samples - at ./dataset/gen-word-855-count.jsonl\n", - "Generated a single JSONL file with 908 samples (100 token repeat) - 295 max words - at ./dataset/shuffle-word-295-count.jsonl\n", - "Generated JSONL file with - 720 max words, 100 samples - at ./dataset/gen-word-720-count.jsonl\n", - "Generated JSONL file with - 790 max words, 100 samples - at ./dataset/gen-word-790-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", - "Generated a single JSONL file with 1362 samples (100 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", - "Generated JSONL file with - 920 max words, 100 samples - at ./dataset/gen-word-920-count.jsonl\n", - "Generated JSONL file with - 895 max words, 100 samples - at ./dataset/gen-word-895-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", - "Generated a single JSONL file with 314 samples (100 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", - "Generated JSONL file with - 460 max words, 100 samples - at ./dataset/gen-word-460-count.jsonl\n", - "Generated a single JSONL file with 581 samples (100 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", - "Generated a single JSONL file with 915 samples (100 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", - "Generated a single JSONL file with 1910 samples (100 token repeat) - 115 max words - at ./dataset/shuffle-word-115-count.jsonl\n", - "Generated JSONL file with - 975 max words, 100 samples - at ./dataset/gen-word-975-count.jsonl\n", - "Generated JSONL file with - 835 max words, 100 samples - at ./dataset/gen-word-835-count.jsonl\n", - "Generated a single JSONL file with 316 samples (100 token repeat) - 865 max words - at ./dataset/shuffle-word-865-count.jsonl\n", - "Generated JSONL file with - 810 max words, 100 samples - at ./dataset/gen-word-810-count.jsonl\n", - "Generated a single JSONL file with 402 samples (100 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", - "Generated JSONL file with - 630 max words, 100 samples - at ./dataset/gen-word-630-count.jsonl\n", - "Generated a single JSONL file with 322 samples (100 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", - "Generated JSONL file with - 445 max words, 100 samples - at ./dataset/gen-word-445-count.jsonl\n", - "Generated JSONL file with - 850 max words, 100 samples - at ./dataset/gen-word-850-count.jsonl\n", - "Generated a single JSONL file with 711 samples (100 token repeat) - 315 max words - at ./dataset/shuffle-word-315-count.jsonl\n", - "Generated a single JSONL file with 703 samples (100 token repeat) - 385 max words - at ./dataset/shuffle-word-385-count.jsonl\n", - "Generated a single JSONL file with 2807 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", - "Generated JSONL file with - 875 max words, 100 samples - at ./dataset/gen-word-875-count.jsonl\n", - "Generated a single JSONL file with 1725 samples (100 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 925 max words - at ./dataset/shuffle-word-925-count.jsonl\n", - "Generated JSONL file with - 865 max words, 100 samples - at ./dataset/gen-word-865-count.jsonl\n", - "Generated JSONL file with - 470 max words, 100 samples - at ./dataset/gen-word-470-count.jsonl\n", - "Generated JSONL file with - 905 max words, 100 samples - at ./dataset/gen-word-905-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 725 max words - at ./dataset/shuffle-word-725-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 665 max words - at ./dataset/shuffle-word-665-count.jsonl\n", - "Generated JSONL file with - 820 max words, 100 samples - at ./dataset/gen-word-820-count.jsonl\n", - "Generated a single JSONL file with 249 samples (100 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", - "Generated a single JSONL file with 314 samples (100 token repeat) - 815 max words - at ./dataset/shuffle-word-815-count.jsonl\n", - "Generated JSONL file with - 1170 max words, 100 samples - at ./dataset/gen-word-1170-count.jsonl\n", - "Generated JSONL file with - 410 max words, 100 samples - at ./dataset/gen-word-410-count.jsonl\n", - "Generated JSONL file with - 930 max words, 100 samples - at ./dataset/gen-word-930-count.jsonl\n", - "Generated JSONL file with - 440 max words, 100 samples - at ./dataset/gen-word-440-count.jsonl\n", - "Generated JSONL file with - 1130 max words, 100 samples - at ./dataset/gen-word-1130-count.jsonl\n", - "Generated JSONL file with - 900 max words, 100 samples - at ./dataset/gen-word-900-count.jsonl\n", - "Generated a single JSONL file with 404 samples (100 token repeat) - 655 max words - at ./dataset/shuffle-word-655-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", - "Generated JSONL file with - 480 max words, 100 samples - at ./dataset/gen-word-480-count.jsonl\n", - "Generated JSONL file with - 870 max words, 100 samples - at ./dataset/gen-word-870-count.jsonl\n", - "Generated JSONL file with - 910 max words, 100 samples - at ./dataset/gen-word-910-count.jsonl\n", - "Generated a single JSONL file with 316 samples (100 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", - "Generated JSONL file with - 510 max words, 100 samples - at ./dataset/gen-word-510-count.jsonl\n", - "Generated JSONL file with - 1015 max words, 100 samples - at ./dataset/gen-word-1015-count.jsonl\n", - "Generated JSONL file with - 1155 max words, 100 samples - at ./dataset/gen-word-1155-count.jsonl\n", - "Generated JSONL file with - 405 max words, 100 samples - at ./dataset/gen-word-405-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", - "Generated JSONL file with - 520 max words, 100 samples - at ./dataset/gen-word-520-count.jsonl\n", - "Generated a single JSONL file with 704 samples (100 token repeat) - 335 max words - at ./dataset/shuffle-word-335-count.jsonl\n", - "Generated JSONL file with - 1150 max words, 100 samples - at ./dataset/gen-word-1150-count.jsonl\n", - "Generated JSONL file with - 415 max words, 100 samples - at ./dataset/gen-word-415-count.jsonl\n", - "Generated JSONL file with - 515 max words, 100 samples - at ./dataset/gen-word-515-count.jsonl\n", - "Generated JSONL file with - 890 max words, 100 samples - at ./dataset/gen-word-890-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", - "Generated JSONL file with - 1350 max words, 100 samples - at ./dataset/gen-word-1350-count.jsonl\n", - "Generated a single JSONL file with 915 samples (100 token repeat) - 265 max words - at ./dataset/shuffle-word-265-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", - "Generated a single JSONL file with 917 samples (100 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", - "Generated a single JSONL file with 5246 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", - "Generated a single JSONL file with 724 samples (100 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", - "Generated JSONL file with - 1260 max words, 100 samples - at ./dataset/gen-word-1260-count.jsonl\n", - "Generated a single JSONL file with 318 samples (100 token repeat) - 885 max words - at ./dataset/shuffle-word-885-count.jsonl\n", - "Generated a single JSONL file with 709 samples (100 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", - "Generated JSONL file with - 570 max words, 100 samples - at ./dataset/gen-word-570-count.jsonl\n", - "Generated a single JSONL file with 318 samples (100 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", - "Generated JSONL file with - 615 max words, 100 samples - at ./dataset/gen-word-615-count.jsonl\n", - "Generated a single JSONL file with 590 samples (100 token repeat) - 455 max words - at ./dataset/shuffle-word-455-count.jsonl\n", - "Generated a single JSONL file with 732 samples (100 token repeat) - 305 max words - at ./dataset/shuffle-word-305-count.jsonl\n", - "Generated a single JSONL file with 576 samples (100 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", - "Generated JSONL file with - 1095 max words, 100 samples - at ./dataset/gen-word-1095-count.jsonl\n", - "Generated a single JSONL file with 4840 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", - "Generated JSONL file with - 565 max words, 100 samples - at ./dataset/gen-word-565-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 795 max words - at ./dataset/shuffle-word-795-count.jsonl\n", - "Generated JSONL file with - 505 max words, 100 samples - at ./dataset/gen-word-505-count.jsonl\n", - "Generated a single JSONL file with 204 samples (100 token repeat) - 1345 max words - at ./dataset/shuffle-word-1345-count.jsonl\n", - "Generated JSONL file with - 1115 max words, 100 samples - at ./dataset/gen-word-1115-count.jsonl\n", - "Generated JSONL file with - 620 max words, 100 samples - at ./dataset/gen-word-620-count.jsonl\n", - "Generated JSONL file with - 1000 max words, 100 samples - at ./dataset/gen-word-1000-count.jsonl\n", - "Generated a single JSONL file with 320 samples (100 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", - "Generated a single JSONL file with 201 samples (100 token repeat) - 1355 max words - at ./dataset/shuffle-word-1355-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", - "Generated JSONL file with - 1025 max words, 100 samples - at ./dataset/gen-word-1025-count.jsonl\n", - "Generated a single JSONL file with 1022 samples (100 token repeat) - 215 max words - at ./dataset/shuffle-word-215-count.jsonl\n", - "Generated JSONL file with - 1420 max words, 100 samples - at ./dataset/gen-word-1420-count.jsonl\n", - "Generated JSONL file with - 1345 max words, 100 samples - at ./dataset/gen-word-1345-count.jsonl\n", - "Generated JSONL file with - 1490 max words, 100 samples - at ./dataset/gen-word-1490-count.jsonl\n", - "Generated a single JSONL file with 401 samples (100 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 935 max words - at ./dataset/shuffle-word-935-count.jsonl\n", - "Generated a single JSONL file with 405 samples (100 token repeat) - 695 max words - at ./dataset/shuffle-word-695-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", - "Generated a single JSONL file with 320 samples (100 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 745 max words - at ./dataset/shuffle-word-745-count.jsonl\n", - "Generated JSONL file with - 1135 max words, 100 samples - at ./dataset/gen-word-1135-count.jsonl\n", - "Generated a single JSONL file with 498 samples (100 token repeat) - 575 max words - at ./dataset/shuffle-word-575-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", - "Generated JSONL file with - 1105 max words, 100 samples - at ./dataset/gen-word-1105-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 545 max words - at ./dataset/shuffle-word-545-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 675 max words - at ./dataset/shuffle-word-675-count.jsonl\n", - "Generated a single JSONL file with 316 samples (100 token repeat) - 845 max words - at ./dataset/shuffle-word-845-count.jsonl\n", - "Generated a single JSONL file with 590 samples (100 token repeat) - 435 max words - at ./dataset/shuffle-word-435-count.jsonl\n", - "Generated a single JSONL file with 252 samples (100 token repeat) - 1245 max words - at ./dataset/shuffle-word-1245-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", - "Generated a single JSONL file with 5858 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", - "Generated a single JSONL file with 710 samples (100 token repeat) - 325 max words - at ./dataset/shuffle-word-325-count.jsonl\n", - "Generated JSONL file with - 635 max words, 100 samples - at ./dataset/gen-word-635-count.jsonl\n", - "Generated a single JSONL file with 404 samples (100 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", - "Generated JSONL file with - 1020 max words, 100 samples - at ./dataset/gen-word-1020-count.jsonl\n", - "Generated a single JSONL file with 497 samples (100 token repeat) - 565 max words - at ./dataset/shuffle-word-565-count.jsonl\n", - "Generated a single JSONL file with 592 samples (100 token repeat) - 425 max words - at ./dataset/shuffle-word-425-count.jsonl\n", - "Generated a single JSONL file with 3318 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", - "Generated a single JSONL file with 701 samples (100 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", - "Generated a single JSONL file with 592 samples (100 token repeat) - 465 max words - at ./dataset/shuffle-word-465-count.jsonl\n", - "Generated a single JSONL file with 314 samples (100 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", - "Generated a single JSONL file with 324 samples (100 token repeat) - 855 max words - at ./dataset/shuffle-word-855-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 775 max words - at ./dataset/shuffle-word-775-count.jsonl\n", - "Generated JSONL file with - 1235 max words, 100 samples - at ./dataset/gen-word-1235-count.jsonl\n", - "Generated a single JSONL file with 317 samples (100 token repeat) - 805 max words - at ./dataset/shuffle-word-805-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 905 max words - at ./dataset/shuffle-word-905-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1455 max words - at ./dataset/shuffle-word-1455-count.jsonl\n", - "Generated JSONL file with - 950 max words, 100 samples - at ./dataset/gen-word-950-count.jsonl\n", - "Generated JSONL file with - 1460 max words, 100 samples - at ./dataset/gen-word-1460-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 645 max words - at ./dataset/shuffle-word-645-count.jsonl\n", - "Generated a single JSONL file with 404 samples (100 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", - "Generated a single JSONL file with 252 samples (100 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", - "Generated a single JSONL file with 206 samples (100 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", - "Generated a single JSONL file with 254 samples (100 token repeat) - 1255 max words - at ./dataset/shuffle-word-1255-count.jsonl\n", - "Generated JSONL file with - 555 max words, 100 samples - at ./dataset/gen-word-555-count.jsonl\n", - "Generated a single JSONL file with 702 samples (100 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", - "Generated JSONL file with - 1230 max words, 100 samples - at ./dataset/gen-word-1230-count.jsonl\n", - "Generated JSONL file with - 1050 max words, 100 samples - at ./dataset/gen-word-1050-count.jsonl\n", - "Generated JSONL file with - 1330 max words, 100 samples - at ./dataset/gen-word-1330-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 715 max words - at ./dataset/shuffle-word-715-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 765 max words - at ./dataset/shuffle-word-765-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 985 max words - at ./dataset/shuffle-word-985-count.jsonl\n", - "Generated JSONL file with - 1195 max words, 100 samples - at ./dataset/gen-word-1195-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", - "Generated a single JSONL file with 319 samples (100 token repeat) - 825 max words - at ./dataset/shuffle-word-825-count.jsonl\n", - "Generated JSONL file with - 980 max words, 100 samples - at ./dataset/gen-word-980-count.jsonl\n", - "Generated JSONL file with - 955 max words, 100 samples - at ./dataset/gen-word-955-count.jsonl\n", - "Generated a single JSONL file with 1683 samples (100 token repeat) - 135 max words - at ./dataset/shuffle-word-135-count.jsonl\n", - "Generated JSONL file with - 1335 max words, 100 samples - at ./dataset/gen-word-1335-count.jsonl\n", - "Generated JSONL file with - 1455 max words, 100 samples - at ./dataset/gen-word-1455-count.jsonl\n", - "Generated JSONL file with - 1055 max words, 100 samples - at ./dataset/gen-word-1055-count.jsonl\n", - "Generated JSONL file with - 1445 max words, 100 samples - at ./dataset/gen-word-1445-count.jsonl\n", - "Generated JSONL file with - 1465 max words, 100 samples - at ./dataset/gen-word-1465-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", - "Generated JSONL file with - 1205 max words, 100 samples - at ./dataset/gen-word-1205-count.jsonl\n", - "Generated a single JSONL file with 296 samples (100 token repeat) - 1115 max words - at ./dataset/shuffle-word-1115-count.jsonl\n", - "Generated JSONL file with - 1290 max words, 100 samples - at ./dataset/gen-word-1290-count.jsonl\n", - "Generated a single JSONL file with 318 samples (100 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", - "Generated a single JSONL file with 1537 samples (100 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", - "Generated JSONL file with - 935 max words, 100 samples - at ./dataset/gen-word-935-count.jsonl\n", - "Generated a single JSONL file with 595 samples (100 token repeat) - 415 max words - at ./dataset/shuffle-word-415-count.jsonl\n", - "Generated JSONL file with - 595 max words, 100 samples - at ./dataset/gen-word-595-count.jsonl\n", - "Generated a single JSONL file with 324 samples (100 token repeat) - 895 max words - at ./dataset/shuffle-word-895-count.jsonl\n", - "Generated a single JSONL file with 586 samples (100 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", - "Generated a single JSONL file with 313 samples (100 token repeat) - 835 max words - at ./dataset/shuffle-word-835-count.jsonl\n", - "Generated JSONL file with - 940 max words, 100 samples - at ./dataset/gen-word-940-count.jsonl\n", - "Generated JSONL file with - 1180 max words, 100 samples - at ./dataset/gen-word-1180-count.jsonl\n", - "Generated JSONL file with - 1370 max words, 100 samples - at ./dataset/gen-word-1370-count.jsonl\n", - "Generated JSONL file with - 1450 max words, 100 samples - at ./dataset/gen-word-1450-count.jsonl\n", - "Generated JSONL file with - 985 max words, 100 samples - at ./dataset/gen-word-985-count.jsonl\n", - "Generated JSONL file with - 1390 max words, 100 samples - at ./dataset/gen-word-1390-count.jsonl\n", - "Generated JSONL file with - 960 max words, 100 samples - at ./dataset/gen-word-960-count.jsonl\n", - "Generated a single JSONL file with 1468 samples (100 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", - "Generated a single JSONL file with 312 samples (100 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", - "Generated JSONL file with - 1495 max words, 100 samples - at ./dataset/gen-word-1495-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", - "Generated a single JSONL file with 1001 samples (100 token repeat) - 225 max words - at ./dataset/shuffle-word-225-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 915 max words - at ./dataset/shuffle-word-915-count.jsonl\n", - "Generated a single JSONL file with 316 samples (100 token repeat) - 875 max words - at ./dataset/shuffle-word-875-count.jsonl\n", - "Generated JSONL file with - 1360 max words, 100 samples - at ./dataset/gen-word-1360-count.jsonl\n", - "Generated a single JSONL file with 586 samples (100 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", - "Generated a single JSONL file with 1394 samples (100 token repeat) - 175 max words - at ./dataset/shuffle-word-175-count.jsonl\n", - "Generated JSONL file with - 1085 max words, 100 samples - at ./dataset/gen-word-1085-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", - "Generated JSONL file with - 1010 max words, 100 samples - at ./dataset/gen-word-1010-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1475 max words - at ./dataset/shuffle-word-1475-count.jsonl\n", - "Generated JSONL file with - 1080 max words, 100 samples - at ./dataset/gen-word-1080-count.jsonl\n", - "Generated a single JSONL file with 399 samples (100 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", - "Generated JSONL file with - 1500 max words, 100 samples - at ./dataset/gen-word-1500-count.jsonl\n", - "Generated JSONL file with - 745 max words, 100 samples - at ./dataset/gen-word-745-count.jsonl\n", - "Generated a single JSONL file with 204 samples (100 token repeat) - 1315 max words - at ./dataset/shuffle-word-1315-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1025 max words - at ./dataset/shuffle-word-1025-count.jsonl\n", - "Generated a single JSONL file with 204 samples (100 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", - "Generated a single JSONL file with 397 samples (100 token repeat) - 705 max words - at ./dataset/shuffle-word-705-count.jsonl\n", - "Generated a single JSONL file with 247 samples (100 token repeat) - 1275 max words - at ./dataset/shuffle-word-1275-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 525 max words - at ./dataset/shuffle-word-525-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1175 max words - at ./dataset/shuffle-word-1175-count.jsonl\n", - "Generated JSONL file with - 1470 max words, 100 samples - at ./dataset/gen-word-1470-count.jsonl\n", - "Generated a single JSONL file with 248 samples (100 token repeat) - 1235 max words - at ./dataset/shuffle-word-1235-count.jsonl\n", - "Generated a single JSONL file with 699 samples (100 token repeat) - 395 max words - at ./dataset/shuffle-word-395-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", - "Generated JSONL file with - 1065 max words, 100 samples - at ./dataset/gen-word-1065-count.jsonl\n", - "Generated JSONL file with - 1090 max words, 100 samples - at ./dataset/gen-word-1090-count.jsonl\n", - "Generated a single JSONL file with 1382 samples (100 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", - "Generated a single JSONL file with 597 samples (100 token repeat) - 405 max words - at ./dataset/shuffle-word-405-count.jsonl\n", - "Generated JSONL file with - 1240 max words, 100 samples - at ./dataset/gen-word-1240-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", - "Generated JSONL file with - 1475 max words, 100 samples - at ./dataset/gen-word-1475-count.jsonl\n", - "Generated JSONL file with - 1365 max words, 100 samples - at ./dataset/gen-word-1365-count.jsonl\n", - "Generated JSONL file with - 995 max words, 100 samples - at ./dataset/gen-word-995-count.jsonl\n", - "Generated JSONL file with - 1175 max words, 100 samples - at ./dataset/gen-word-1175-count.jsonl\n", - "Generated JSONL file with - 1035 max words, 100 samples - at ./dataset/gen-word-1035-count.jsonl\n", - "Generated a single JSONL file with 402 samples (100 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 555 max words - at ./dataset/shuffle-word-555-count.jsonl\n", - "Generated JSONL file with - 1310 max words, 100 samples - at ./dataset/gen-word-1310-count.jsonl\n", - "Generated a single JSONL file with 401 samples (100 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", - "Generated JSONL file with - 1245 max words, 100 samples - at ./dataset/gen-word-1245-count.jsonl\n", - "Generated JSONL file with - 1125 max words, 100 samples - at ./dataset/gen-word-1125-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 505 max words - at ./dataset/shuffle-word-505-count.jsonl\n", - "Generated JSONL file with - 1210 max words, 100 samples - at ./dataset/gen-word-1210-count.jsonl\n", - "Generated a single JSONL file with 2057 samples (100 token repeat) - 105 max words - at ./dataset/shuffle-word-105-count.jsonl\n", - "Generated JSONL file with - 1270 max words, 100 samples - at ./dataset/gen-word-1270-count.jsonl\n", - "Generated JSONL file with - 1225 max words, 100 samples - at ./dataset/gen-word-1225-count.jsonl\n", - "Generated JSONL file with - 945 max words, 100 samples - at ./dataset/gen-word-945-count.jsonl\n", - "Generated JSONL file with - 1400 max words, 100 samples - at ./dataset/gen-word-1400-count.jsonl\n", - "Generated JSONL file with - 1410 max words, 100 samples - at ./dataset/gen-word-1410-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", - "Generated JSONL file with - 1185 max words, 100 samples - at ./dataset/gen-word-1185-count.jsonl\n", - "Generated a single JSONL file with 498 samples (100 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1015 max words - at ./dataset/shuffle-word-1015-count.jsonl\n", - "Generated JSONL file with - 1295 max words, 100 samples - at ./dataset/gen-word-1295-count.jsonl\n", - "Generated JSONL file with - 1375 max words, 100 samples - at ./dataset/gen-word-1375-count.jsonl\n", - "Generated a single JSONL file with 257 samples (100 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", - "Generated a single JSONL file with 1046 samples (100 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", - "Generated JSONL file with - 1425 max words, 100 samples - at ./dataset/gen-word-1425-count.jsonl\n", - "Generated JSONL file with - 1325 max words, 100 samples - at ./dataset/gen-word-1325-count.jsonl\n", - "Generated a single JSONL file with 254 samples (100 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", - "Generated a single JSONL file with 498 samples (100 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", - "Generated JSONL file with - 965 max words, 100 samples - at ./dataset/gen-word-965-count.jsonl\n", - "Generated JSONL file with - 1075 max words, 100 samples - at ./dataset/gen-word-1075-count.jsonl\n", - "Generated a single JSONL file with 1412 samples (100 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1055 max words - at ./dataset/shuffle-word-1055-count.jsonl\n", - "Generated JSONL file with - 1100 max words, 100 samples - at ./dataset/gen-word-1100-count.jsonl\n", - "Generated a single JSONL file with 249 samples (100 token repeat) - 1225 max words - at ./dataset/shuffle-word-1225-count.jsonl\n", - "Generated JSONL file with - 1415 max words, 100 samples - at ./dataset/gen-word-1415-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1185 max words - at ./dataset/shuffle-word-1185-count.jsonl\n", - "Generated JSONL file with - 1355 max words, 100 samples - at ./dataset/gen-word-1355-count.jsonl\n", - "Generated a single JSONL file with 17799 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", - "Generated a single JSONL file with 1845 samples (100 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", - "Generated a single JSONL file with 996 samples (100 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", - "Generated JSONL file with - 1120 max words, 100 samples - at ./dataset/gen-word-1120-count.jsonl\n", - "Generated JSONL file with - 1060 max words, 100 samples - at ./dataset/gen-word-1060-count.jsonl\n", - "Generated a single JSONL file with 584 samples (100 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", - "Generated a single JSONL file with 919 samples (100 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", - "Generated JSONL file with - 1480 max words, 100 samples - at ./dataset/gen-word-1480-count.jsonl\n", - "Generated JSONL file with - 1220 max words, 100 samples - at ./dataset/gen-word-1220-count.jsonl\n", - "Generated JSONL file with - 1320 max words, 100 samples - at ./dataset/gen-word-1320-count.jsonl\n", - "Generated JSONL file with - 1045 max words, 100 samples - at ./dataset/gen-word-1045-count.jsonl\n", - "Generated a single JSONL file with 202 samples (100 token repeat) - 1335 max words - at ./dataset/shuffle-word-1335-count.jsonl\n", - "Generated JSONL file with - 1280 max words, 100 samples - at ./dataset/gen-word-1280-count.jsonl\n", - "Generated JSONL file with - 1405 max words, 100 samples - at ./dataset/gen-word-1405-count.jsonl\n", - "Generated JSONL file with - 1160 max words, 100 samples - at ./dataset/gen-word-1160-count.jsonl\n", - "Generated a single JSONL file with 398 samples (100 token repeat) - 735 max words - at ./dataset/shuffle-word-735-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 585 max words - at ./dataset/shuffle-word-585-count.jsonl\n", - "Generated JSONL file with - 1200 max words, 100 samples - at ./dataset/gen-word-1200-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 955 max words - at ./dataset/shuffle-word-955-count.jsonl\n", - "Generated a single JSONL file with 913 samples (100 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", - "Generated a single JSONL file with 939 samples (100 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", - "Generated JSONL file with - 1275 max words, 100 samples - at ./dataset/gen-word-1275-count.jsonl\n", - "Generated JSONL file with - 1030 max words, 100 samples - at ./dataset/gen-word-1030-count.jsonl\n", - "Generated a single JSONL file with 699 samples (100 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", - "Generated JSONL file with - 1040 max words, 100 samples - at ./dataset/gen-word-1040-count.jsonl\n", - "Generated a single JSONL file with 704 samples (100 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", - "Generated JSONL file with - 970 max words, 100 samples - at ./dataset/gen-word-970-count.jsonl\n", - "Generated a single JSONL file with 2932 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 945 max words - at ./dataset/shuffle-word-945-count.jsonl\n", - "Generated JSONL file with - 1190 max words, 100 samples - at ./dataset/gen-word-1190-count.jsonl\n", - "Generated a single JSONL file with 2659 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", - "Generated a single JSONL file with 258 samples (100 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1435 max words - at ./dataset/shuffle-word-1435-count.jsonl\n", - "Generated a single JSONL file with 1791 samples (100 token repeat) - 125 max words - at ./dataset/shuffle-word-125-count.jsonl\n", - "Generated a single JSONL file with 931 samples (100 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", - "Generated a single JSONL file with 915 samples (100 token repeat) - 285 max words - at ./dataset/shuffle-word-285-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", - "Generated JSONL file with - 1385 max words, 100 samples - at ./dataset/gen-word-1385-count.jsonl\n", - "Generated JSONL file with - 1110 max words, 100 samples - at ./dataset/gen-word-1110-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1035 max words - at ./dataset/shuffle-word-1035-count.jsonl\n", - "Generated a single JSONL file with 593 samples (100 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", - "Generated a single JSONL file with 250 samples (100 token repeat) - 1265 max words - at ./dataset/shuffle-word-1265-count.jsonl\n", - "Generated a single JSONL file with 990 samples (100 token repeat) - 235 max words - at ./dataset/shuffle-word-235-count.jsonl\n", - "Generated a single JSONL file with 239 samples (100 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1125 max words - at ./dataset/shuffle-word-1125-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1375 max words - at ./dataset/shuffle-word-1375-count.jsonl\n", - "Generated JSONL file with - 1250 max words, 100 samples - at ./dataset/gen-word-1250-count.jsonl\n", - "Generated JSONL file with - 1430 max words, 100 samples - at ./dataset/gen-word-1430-count.jsonl\n", - "Generated a single JSONL file with 497 samples (100 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 515 max words - at ./dataset/shuffle-word-515-count.jsonl\n", - "Generated JSONL file with - 1140 max words, 100 samples - at ./dataset/gen-word-1140-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1195 max words - at ./dataset/shuffle-word-1195-count.jsonl\n", - "Generated JSONL file with - 1145 max words, 100 samples - at ./dataset/gen-word-1145-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1365 max words - at ./dataset/shuffle-word-1365-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 595 max words - at ./dataset/shuffle-word-595-count.jsonl\n", - "Generated JSONL file with - 990 max words, 100 samples - at ./dataset/gen-word-990-count.jsonl\n", - "Generated JSONL file with - 1340 max words, 100 samples - at ./dataset/gen-word-1340-count.jsonl\n", - "Generated JSONL file with - 1165 max words, 100 samples - at ./dataset/gen-word-1165-count.jsonl\n", - "Generated JSONL file with - 1315 max words, 100 samples - at ./dataset/gen-word-1315-count.jsonl\n", - "Generated a single JSONL file with 1582 samples (100 token repeat) - 145 max words - at ./dataset/shuffle-word-145-count.jsonl\n", - "Generated a single JSONL file with 202 samples (100 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", - "Generated JSONL file with - 1435 max words, 100 samples - at ./dataset/gen-word-1435-count.jsonl\n", - "Generated a single JSONL file with 969 samples (100 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", - "Generated JSONL file with - 1005 max words, 100 samples - at ./dataset/gen-word-1005-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1065 max words - at ./dataset/shuffle-word-1065-count.jsonl\n", - "Generated a single JSONL file with 258 samples (100 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", - "Generated a single JSONL file with 1992 samples (100 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", - "Generated JSONL file with - 1485 max words, 100 samples - at ./dataset/gen-word-1485-count.jsonl\n", - "Generated JSONL file with - 1380 max words, 100 samples - at ./dataset/gen-word-1380-count.jsonl\n", - "Generated a single JSONL file with 7531 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1155 max words - at ./dataset/shuffle-word-1155-count.jsonl\n", - "Generated a single JSONL file with 917 samples (100 token repeat) - 275 max words - at ./dataset/shuffle-word-275-count.jsonl\n", - "Generated a single JSONL file with 583 samples (100 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 615 max words - at ./dataset/shuffle-word-615-count.jsonl\n", - "Generated a single JSONL file with 1353 samples (100 token repeat) - 195 max words - at ./dataset/shuffle-word-195-count.jsonl\n", - "Generated JSONL file with - 1070 max words, 100 samples - at ./dataset/gen-word-1070-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", - "Generated a single JSONL file with 13090 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1005 max words - at ./dataset/shuffle-word-1005-count.jsonl\n", - "Generated JSONL file with - 1215 max words, 100 samples - at ./dataset/gen-word-1215-count.jsonl\n", - "Generated JSONL file with - 1285 max words, 100 samples - at ./dataset/gen-word-1285-count.jsonl\n", - "Generated a single JSONL file with 1498 samples (100 token repeat) - 155 max words - at ./dataset/shuffle-word-155-count.jsonl\n", - "Generated a single JSONL file with 954 samples (100 token repeat) - 245 max words - at ./dataset/shuffle-word-245-count.jsonl\n", - "Generated a single JSONL file with 257 samples (100 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1405 max words - at ./dataset/shuffle-word-1405-count.jsonl\n", - "Generated a single JSONL file with 599 samples (100 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 535 max words - at ./dataset/shuffle-word-535-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1465 max words - at ./dataset/shuffle-word-1465-count.jsonl\n", - "Generated a single JSONL file with 258 samples (100 token repeat) - 1295 max words - at ./dataset/shuffle-word-1295-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1305 max words - at ./dataset/shuffle-word-1305-count.jsonl\n", - "Generated a single JSONL file with 1006 samples (100 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", - "Generated a single JSONL file with 297 samples (100 token repeat) - 1135 max words - at ./dataset/shuffle-word-1135-count.jsonl\n", - "Generated a single JSONL file with 1083 samples (100 token repeat) - 205 max words - at ./dataset/shuffle-word-205-count.jsonl\n", - "Generated a single JSONL file with 8728 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", - "Generated a single JSONL file with 6585 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", - "Generated JSONL file with - 1305 max words, 100 samples - at ./dataset/gen-word-1305-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", - "Generated a single JSONL file with 700 samples (100 token repeat) - 375 max words - at ./dataset/shuffle-word-375-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", - "Generated JSONL file with - 1265 max words, 100 samples - at ./dataset/gen-word-1265-count.jsonl\n", - "Generated a single JSONL file with 253 samples (100 token repeat) - 1215 max words - at ./dataset/shuffle-word-1215-count.jsonl\n", - "Generated a single JSONL file with 298 samples (100 token repeat) - 1145 max words - at ./dataset/shuffle-word-1145-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1075 max words - at ./dataset/shuffle-word-1075-count.jsonl\n", - "Generated JSONL file with - 1255 max words, 100 samples - at ./dataset/gen-word-1255-count.jsonl\n", - "Generated a single JSONL file with 405 samples (100 token repeat) - 605 max words - at ./dataset/shuffle-word-605-count.jsonl\n", - "Generated a single JSONL file with 595 samples (100 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", - "Generated a single JSONL file with 584 samples (100 token repeat) - 445 max words - at ./dataset/shuffle-word-445-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", - "Generated JSONL file with - 1300 max words, 100 samples - at ./dataset/gen-word-1300-count.jsonl\n", - "Generated a single JSONL file with 204 samples (100 token repeat) - 1385 max words - at ./dataset/shuffle-word-1385-count.jsonl\n", - "Generated a single JSONL file with 296 samples (100 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", - "Generated a single JSONL file with 201 samples (100 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", - "Generated JSONL file with - 1440 max words, 100 samples - at ./dataset/gen-word-1440-count.jsonl\n", - "Generated a single JSONL file with 204 samples (100 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1165 max words - at ./dataset/shuffle-word-1165-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", - "Generated a single JSONL file with 500 samples (100 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", - "Generated a single JSONL file with 1439 samples (100 token repeat) - 165 max words - at ./dataset/shuffle-word-165-count.jsonl\n", - "Generated a single JSONL file with 931 samples (100 token repeat) - 255 max words - at ./dataset/shuffle-word-255-count.jsonl\n", - "Generated a single JSONL file with 400 samples (100 token repeat) - 635 max words - at ./dataset/shuffle-word-635-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1495 max words - at ./dataset/shuffle-word-1495-count.jsonl\n", - "Generated JSONL file with - 1395 max words, 100 samples - at ./dataset/gen-word-1395-count.jsonl\n", - "Generated a single JSONL file with 586 samples (100 token repeat) - 475 max words - at ./dataset/shuffle-word-475-count.jsonl\n", - "Generated a single JSONL file with 498 samples (100 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", - "Generated a single JSONL file with 1622 samples (100 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", - "Generated a single JSONL file with 204 samples (100 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1045 max words - at ./dataset/shuffle-word-1045-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1425 max words - at ./dataset/shuffle-word-1425-count.jsonl\n", - "Generated a single JSONL file with 299 samples (100 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", - "Generated a single JSONL file with 254 samples (100 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1085 max words - at ./dataset/shuffle-word-1085-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 995 max words - at ./dataset/shuffle-word-995-count.jsonl\n", - "Generated a single JSONL file with 704 samples (100 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", - "Generated a single JSONL file with 499 samples (100 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", - "Generated a single JSONL file with 592 samples (100 token repeat) - 485 max words - at ./dataset/shuffle-word-485-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 975 max words - at ./dataset/shuffle-word-975-count.jsonl\n", - "Generated a single JSONL file with 201 samples (100 token repeat) - 1325 max words - at ./dataset/shuffle-word-1325-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 965 max words - at ./dataset/shuffle-word-965-count.jsonl\n", - "Generated a single JSONL file with 588 samples (100 token repeat) - 495 max words - at ./dataset/shuffle-word-495-count.jsonl\n", - "Generated a single JSONL file with 205 samples (100 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", - "Generated a single JSONL file with 257 samples (100 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", - "Generated a single JSONL file with 297 samples (100 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", - "Generated a single JSONL file with 296 samples (100 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", - "Generated a single JSONL file with 703 samples (100 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", - "Generated a single JSONL file with 403 samples (100 token repeat) - 625 max words - at ./dataset/shuffle-word-625-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1095 max words - at ./dataset/shuffle-word-1095-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1445 max words - at ./dataset/shuffle-word-1445-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1485 max words - at ./dataset/shuffle-word-1485-count.jsonl\n", - "Generated a single JSONL file with 297 samples (100 token repeat) - 1105 max words - at ./dataset/shuffle-word-1105-count.jsonl\n", - "Generated a single JSONL file with 246 samples (100 token repeat) - 1205 max words - at ./dataset/shuffle-word-1205-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", - "Generated a single JSONL file with 10630 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", - "Generated a single JSONL file with 300 samples (100 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", - "Generated a single JSONL file with 250 samples (100 token repeat) - 1285 max words - at ./dataset/shuffle-word-1285-count.jsonl\n", - "Generated a single JSONL file with 203 samples (100 token repeat) - 1395 max words - at ./dataset/shuffle-word-1395-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1415 max words - at ./dataset/shuffle-word-1415-count.jsonl\n", - "Generated a single JSONL file with 26127 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", - "Generated a single JSONL file with 55948 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", - "## Done ##\n", - "total 1011M\n", - "drwxrwxr-x 2 recursal recursal 84K Jan 22 20:29 .\n", - "drwxrwxr-x 5 recursal recursal 4.0K Jan 22 18:29 ..\n", - "-rw-rw-r-- 1 recursal recursal 973K Jan 22 20:29 gen-word-1000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 994K Jan 22 20:29 gen-word-1005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 526K Jan 22 20:29 gen-word-100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 999K Jan 22 20:29 gen-word-1010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 994K Jan 22 20:29 gen-word-1015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1004K Jan 22 20:29 gen-word-1020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 998K Jan 22 20:29 gen-word-1025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1016K Jan 22 20:29 gen-word-1030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1017K Jan 22 20:29 gen-word-1035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1018K Jan 22 20:29 gen-word-1040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1021K Jan 22 20:29 gen-word-1045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 143K Jan 22 20:29 gen-word-105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 99K Jan 22 20:29 gen-word-10-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 143K Jan 22 20:29 gen-word-110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 152K Jan 22 20:29 gen-word-115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.1M Jan 22 20:29 gen-word-1160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 159K Jan 22 20:29 gen-word-120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 166K Jan 22 20:29 gen-word-125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.2M Jan 22 20:29 gen-word-1260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 168K Jan 22 20:29 gen-word-130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 173K Jan 22 20:29 gen-word-135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 20:29 gen-word-1370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 178K Jan 22 20:29 gen-word-140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 189K Jan 22 20:29 gen-word-145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 20:29 gen-word-1465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 20:29 gen-word-1500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 192K Jan 22 20:29 gen-word-150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 201K Jan 22 20:29 gen-word-155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 122K Jan 22 20:29 gen-word-15-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 205K Jan 22 20:29 gen-word-160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 216K Jan 22 20:29 gen-word-165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 216K Jan 22 20:29 gen-word-170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 225K Jan 22 20:29 gen-word-175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 229K Jan 22 20:29 gen-word-180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 234K Jan 22 20:29 gen-word-185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 251K Jan 22 20:29 gen-word-190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 251K Jan 22 20:29 gen-word-195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 252K Jan 22 20:29 gen-word-200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 207K Jan 22 20:29 gen-word-205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 149K Jan 22 20:29 gen-word-20-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 215K Jan 22 20:29 gen-word-210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 218K Jan 22 20:29 gen-word-215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 222K Jan 22 20:29 gen-word-220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 229K Jan 22 20:29 gen-word-225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 236K Jan 22 20:29 gen-word-230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 242K Jan 22 20:29 gen-word-235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 251K Jan 22 20:29 gen-word-240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 246K Jan 22 20:29 gen-word-245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 256K Jan 22 20:29 gen-word-250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 262K Jan 22 20:29 gen-word-255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 171K Jan 22 20:29 gen-word-25-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 262K Jan 22 20:29 gen-word-260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 267K Jan 22 20:29 gen-word-265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 277K Jan 22 20:29 gen-word-270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 276K Jan 22 20:29 gen-word-275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 275K Jan 22 20:29 gen-word-280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 289K Jan 22 20:29 gen-word-285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 290K Jan 22 20:29 gen-word-290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 296K Jan 22 20:29 gen-word-295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 307K Jan 22 20:29 gen-word-300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 307K Jan 22 20:29 gen-word-305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 196K Jan 22 20:29 gen-word-30-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 307K Jan 22 20:29 gen-word-310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 322K Jan 22 20:29 gen-word-315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 318K Jan 22 20:29 gen-word-320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 332K Jan 22 20:29 gen-word-325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 328K Jan 22 20:29 gen-word-330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 334K Jan 22 20:29 gen-word-335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 344K Jan 22 20:29 gen-word-340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 349K Jan 22 20:29 gen-word-345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 356K Jan 22 20:29 gen-word-350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 354K Jan 22 20:29 gen-word-355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 219K Jan 22 20:29 gen-word-35-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 351K Jan 22 20:29 gen-word-360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 364K Jan 22 20:29 gen-word-365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 372K Jan 22 20:29 gen-word-370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 375K Jan 22 20:29 gen-word-375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 387K Jan 22 20:29 gen-word-380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 386K Jan 22 20:29 gen-word-385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 386K Jan 22 20:29 gen-word-390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 399K Jan 22 20:29 gen-word-395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 395K Jan 22 20:29 gen-word-400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 404K Jan 22 20:29 gen-word-405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 241K Jan 22 20:29 gen-word-40-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 408K Jan 22 20:29 gen-word-410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 411K Jan 22 20:29 gen-word-415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 420K Jan 22 20:29 gen-word-420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 426K Jan 22 20:29 gen-word-425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 429K Jan 22 20:29 gen-word-430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 431K Jan 22 20:29 gen-word-435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 433K Jan 22 20:29 gen-word-440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 440K Jan 22 20:29 gen-word-445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 449K Jan 22 20:29 gen-word-450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 457K Jan 22 20:29 gen-word-455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 267K Jan 22 20:29 gen-word-45-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 464K Jan 22 20:29 gen-word-460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 456K Jan 22 20:29 gen-word-465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 470K Jan 22 20:29 gen-word-470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 466K Jan 22 20:29 gen-word-475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 480K Jan 22 20:29 gen-word-480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 477K Jan 22 20:29 gen-word-485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 486K Jan 22 20:29 gen-word-490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 483K Jan 22 20:29 gen-word-495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 499K Jan 22 20:29 gen-word-500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 494K Jan 22 20:29 gen-word-505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 295K Jan 22 20:29 gen-word-50-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 506K Jan 22 20:29 gen-word-510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 499K Jan 22 20:29 gen-word-515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 515K Jan 22 20:29 gen-word-520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 522K Jan 22 20:29 gen-word-525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 526K Jan 22 20:29 gen-word-530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 532K Jan 22 20:29 gen-word-535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 535K Jan 22 20:29 gen-word-540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 539K Jan 22 20:29 gen-word-545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 546K Jan 22 20:29 gen-word-550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 555K Jan 22 20:29 gen-word-555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 315K Jan 22 20:29 gen-word-55-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 557K Jan 22 20:29 gen-word-560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 558K Jan 22 20:29 gen-word-565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 566K Jan 22 20:29 gen-word-570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 567K Jan 22 20:29 gen-word-575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 569K Jan 22 20:29 gen-word-580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 586K Jan 22 20:29 gen-word-585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 583K Jan 22 20:29 gen-word-590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 594K Jan 22 20:29 gen-word-595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 72K Jan 22 20:29 gen-word-5-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 585K Jan 22 20:29 gen-word-600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 608K Jan 22 20:29 gen-word-605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 346K Jan 22 20:29 gen-word-60-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 596K Jan 22 20:29 gen-word-610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 612K Jan 22 20:29 gen-word-615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 610K Jan 22 20:29 gen-word-620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 623K Jan 22 20:29 gen-word-625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 628K Jan 22 20:29 gen-word-630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 631K Jan 22 20:29 gen-word-635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 636K Jan 22 20:29 gen-word-640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 633K Jan 22 20:29 gen-word-645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 644K Jan 22 20:29 gen-word-650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 639K Jan 22 20:29 gen-word-655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 369K Jan 22 20:29 gen-word-65-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 651K Jan 22 20:29 gen-word-660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 656K Jan 22 20:29 gen-word-665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 656K Jan 22 20:29 gen-word-670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 674K Jan 22 20:29 gen-word-675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 681K Jan 22 20:29 gen-word-680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 680K Jan 22 20:29 gen-word-685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 678K Jan 22 20:29 gen-word-690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 686K Jan 22 20:29 gen-word-695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 697K Jan 22 20:29 gen-word-700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 705K Jan 22 20:29 gen-word-705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 393K Jan 22 20:29 gen-word-70-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 697K Jan 22 20:29 gen-word-710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 709K Jan 22 20:29 gen-word-715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 717K Jan 22 20:29 gen-word-720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 707K Jan 22 20:29 gen-word-725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 720K Jan 22 20:29 gen-word-730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 734K Jan 22 20:29 gen-word-735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 727K Jan 22 20:29 gen-word-740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 735K Jan 22 20:29 gen-word-745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 737K Jan 22 20:29 gen-word-750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 748K Jan 22 20:29 gen-word-755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 411K Jan 22 20:29 gen-word-75-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 747K Jan 22 20:29 gen-word-760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 745K Jan 22 20:29 gen-word-765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 752K Jan 22 20:29 gen-word-770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 754K Jan 22 20:29 gen-word-775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 771K Jan 22 20:29 gen-word-780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 765K Jan 22 20:29 gen-word-785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 776K Jan 22 20:29 gen-word-790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 782K Jan 22 20:29 gen-word-795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 792K Jan 22 20:29 gen-word-800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 796K Jan 22 20:29 gen-word-805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 441K Jan 22 20:29 gen-word-80-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 801K Jan 22 20:29 gen-word-810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 805K Jan 22 20:29 gen-word-815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 820K Jan 22 20:29 gen-word-820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 814K Jan 22 20:29 gen-word-825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 811K Jan 22 20:29 gen-word-830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 820K Jan 22 20:29 gen-word-835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 826K Jan 22 20:29 gen-word-840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 838K Jan 22 20:29 gen-word-845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 836K Jan 22 20:29 gen-word-850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 840K Jan 22 20:29 gen-word-855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 462K Jan 22 20:29 gen-word-85-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 855K Jan 22 20:29 gen-word-860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 858K Jan 22 20:29 gen-word-865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 854K Jan 22 20:29 gen-word-870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 864K Jan 22 20:29 gen-word-875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 867K Jan 22 20:29 gen-word-880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 865K Jan 22 20:29 gen-word-885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 866K Jan 22 20:29 gen-word-890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 877K Jan 22 20:29 gen-word-895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 883K Jan 22 20:29 gen-word-900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 896K Jan 22 20:29 gen-word-905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 489K Jan 22 20:29 gen-word-90-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 892K Jan 22 20:29 gen-word-910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 897K Jan 22 20:29 gen-word-915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 907K Jan 22 20:29 gen-word-920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 907K Jan 22 20:29 gen-word-925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 912K Jan 22 20:29 gen-word-930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 927K Jan 22 20:29 gen-word-935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 936K Jan 22 20:29 gen-word-940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 923K Jan 22 20:29 gen-word-945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 944K Jan 22 20:29 gen-word-950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 933K Jan 22 20:29 gen-word-955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 509K Jan 22 20:29 gen-word-95-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 944K Jan 22 20:29 gen-word-960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 947K Jan 22 20:29 gen-word-965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 955K Jan 22 20:29 gen-word-970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 964K Jan 22 20:29 gen-word-975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 967K Jan 22 20:29 gen-word-980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 966K Jan 22 20:29 gen-word-985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 966K Jan 22 20:29 gen-word-990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 974K Jan 22 20:29 gen-word-995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 20:29 shuffle-word-10-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-1500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 20:29 shuffle-word-15-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 20:29 shuffle-word-20-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 20:29 shuffle-word-25-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 20:29 shuffle-word-300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 20:29 shuffle-word-30-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 20:29 shuffle-word-35-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 20:29 shuffle-word-40-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 20:29 shuffle-word-45-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 20:29 shuffle-word-50-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 20:29 shuffle-word-55-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 8.0M Jan 22 20:29 shuffle-word-5-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 20:29 shuffle-word-60-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 20:29 shuffle-word-65-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 20:29 shuffle-word-70-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 20:29 shuffle-word-75-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 20:29 shuffle-word-80-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-85-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-90-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 20:29 shuffle-word-95-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 20:29 shuffle-word-995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 141K Jan 22 20:29 word-2-count.jsonl\n" - ] - } - ], + "execution_count": null, + "id": "75bb2cc2", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:31.185716Z", + "iopub.status.busy": "2024-01-23T13:12:31.185310Z", + "iopub.status.idle": "2024-01-23T13:12:34.675107Z", + "shell.execute_reply": "2024-01-23T13:12:34.673742Z" + }, + "papermill": { + "duration": 3.524274, + "end_time": "2024-01-23T13:12:34.704500", + "exception": false, + "start_time": "2024-01-23T13:12:31.180226", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], "source": [ "%%script bash\n", "\n", @@ -1387,21 +244,21 @@ "echo \"## Generating word reptition dataset ##\"\n", "\n", "#\n", - "# Training set for < 50 words\n", + "# Training set for < 100 words\n", "# This is used to fill up as much blanks as possible\n", "#\n", - "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 300 &\n", - "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 4 1000 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", "for i in {5..100..5} \n", "do\n", - " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 500 & \n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 150 & \n", " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", "done\n", "\n", "#\n", "# Ramping up the 50+ - 400 words dataset\n", "# \n", - "for i in {105..200..5} \n", + "for i in {110..200..10} \n", "do\n", " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", @@ -1410,7 +267,7 @@ "#\n", "# Ramping up the 50+ - 400 words dataset\n", "# \n", - "for i in {205..1500..5} \n", + "for i in {210..4000..10} \n", "do\n", " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", @@ -1424,25 +281,32 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Resolving data files: 100%|███████████████| 601/601 [00:00<00:00, 370647.95it/s]\n", - "Filter (num_proc=160): 100%|██| 372801/372801 [00:03<00:00, 93704.48 examples/s]\n", - "Map (num_proc=160): 100%|████| 363015/363015 [00:02<00:00, 127526.30 examples/s]\n", - "Map (num_proc=160): 100%|█████| 363015/363015 [00:07<00:00, 46066.68 examples/s]\n", - "Map (num_proc=160): 100%|███████| 87900/87900 [00:03<00:00, 27106.01 examples/s]\n", - "Saving the dataset (2/2 shards): 100%|█| 87900/87900 [00:01<00:00, 82134.79 exam\n", - "Saving the dataset (1/1 shards): 100%|█| 364/364 [00:00<00:00, 13312.35 examples\n" - ] - } - ], + "execution_count": null, + "id": "db71b4d3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:34.941925Z", + "iopub.status.busy": "2024-01-23T13:12:34.941597Z", + "iopub.status.idle": "2024-01-23T13:15:24.022937Z", + "shell.execute_reply": "2024-01-23T13:15:24.021071Z" + }, + "papermill": { + "duration": 169.166276, + "end_time": "2024-01-23T13:15:24.026023", + "exception": false, + "start_time": "2024-01-23T13:12:34.859747", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], "source": [ "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", "!cd \"{TRAINER_DIR}\" && \\\n", " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-1-tune.yaml\"\n", "\n", @@ -1452,357 +316,42 @@ }, { "cell_type": "markdown", - "metadata": {}, + "id": "8fa689d3", + "metadata": { + "papermill": { + "duration": 0.113732, + "end_time": "2024-01-23T13:15:24.262597", + "exception": false, + "start_time": "2024-01-23T13:15:24.148865", + "status": "completed" + }, + "tags": [] + }, "source": [ - "## Finetune 1 (0 -> 2k) : The actual tune!" + "## Finetune 1 (0 -> 4k) : The actual tune!" ] }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2024-01-22 20:30:14,781] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-1-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8x4090] RWKV-v5-1B5-World - Mem-Finetune-1 (bs=256, train-ctx=2048, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=2048'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-1-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8x4090] RWKV-v5-1B5-World - Mem-Finetune-1 (bs=256, train-ctx=2048, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=2048'].\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 2435230032\n", - "Seed set to 2435230032\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "\n", - "\n", - "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", - " - target_batch_size: 256\n", - " - num_nodes: 1\n", - " - num_devices: 8\n", - " - microbatch_size: 8\n", - " - accumulate_grad_batches: 4\n", - " - effective_batch_size: 256\n", - "\n", - "[rank: 0] Seed set to 2435230032\n", - "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", - "[2024-01-22 20:30:34,516] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-22 20:30:34,555] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-22 20:30:34,573] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-22 20:30:34,643] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-22 20:30:34,665] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-22 20:30:34,667] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-22 20:30:34,779] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[rank: 6] Seed set to 2435230032\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "[rank: 3] Seed set to 2435230032\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "[rank: 4] Seed set to 2435230032\n", - "[rank: 7] Seed set to 2435230032\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "[rank: 5] Seed set to 2435230032\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "[rank: 2] Seed set to 2435230032\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "[rank: 1] Seed set to 2435230032\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "ninja: no work to do.\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "[rank: 1] Seed set to 2435230032\n", - "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", - "[rank: 2] Seed set to 2435230032\n", - "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", - "[rank: 3] Seed set to 2435230032\n", - "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", - "[rank: 6] Seed set to 2435230032\n", - "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", - "[rank: 5] Seed set to 2435230032\n", - "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", - "[rank: 7] Seed set to 2435230032\n", - "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", - "[rank: 4] Seed set to 2435230032\n", - "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", - "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240122_203109-hhwmn520\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8x4090] RWKV-v5-1B5-World - Mem-Finetune-1 (bs=256, train-ctx=2048, deepspeed_stage_1)\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/hhwmn520\u001b[0m\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "#\n", - "# RWKV lighting_trainer.py important notes \n", - "# https://github.com/RWKV/RWKV-infctx-trainer \n", - "#\n", - "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", - "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", - "# - When resuming from checkpoint, the estimated time is inaccurate\n", - "#LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "\n", - "\n", - "[RWKV.model] Configuring optimizer with\n", - " - lr_init: 8.000e-04 (0.0008)\n", - " - lr_final: 4.000e-04 (0.0004)\n", - "\n", - "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", - "Building extension module fused_adam...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.07379484176635742 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Loading `train_dataloader` to estimate number of stepping batches.\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.1038506031036377 seconds\n", - "Time to load fused_adam op: 0.10417509078979492 seconds\n", - "Time to load fused_adam op: 0.10447382926940918 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10385513305664062 seconds\n", - "Time to load fused_adam op: 0.10325026512145996 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10282731056213379 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10386180877685547 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", - " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "\n", - " | Name | Type | Params\n", - "--------------------------------------\n", - "0 | emb | Embedding | 134 M \n", - "1 | blocks | ModuleList | 1.3 B \n", - "2 | ln_out | LayerNorm | 4.1 K \n", - "3 | head | Linear | 134 M \n", - "--------------------------------------\n", - "1.6 B Trainable params\n", - "0 Non-trainable params\n", - "1.6 B Total params\n", - "6,311.018 Total estimated model params size (MB)\n", - "Epoch 0: 7%| | 100/1374 [04:50<1:01:41, 0.34it/s, v_num=n520, train/loss=0.05/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", - " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", - " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", - " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", - " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", - " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", - " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", - " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", - " warnings.warn(\n", - "Epoch 0: 100%|█| 1374/1374 [1:01:19<00:00, 0.37it/s, v_num=n520, train/loss=0.1\n", - "Validation: | | 0/? [00:00 2*2k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b7f8a18", + "metadata": {}, + "outputs": [], + "source": [ + "# Start the finetune model training\n", "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 dragon_test.py \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\" \"cuda fp32\"" + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-1-tune.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-1 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=4 \\\n", + " --model.ctx_len=8192" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, + "id": "f35d6f67", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SCRIPT_DIR: /home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/memory_script\n", - "PROJECT_DIR: /home/recursal/RWKV-infctx-trainer\n", - "MODEL_CODE_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v5\n", - "[2024-01-22 22:55:37,758] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "/home/recursal/RWKV-infctx-trainer/RWKV-v5/src/model.py:1390: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", - " batch_tokens = torch.tensor(\n", - "###\n", - "### Model validation start ###\n", - "###\n", - "## Model validation for 5 tokens : 60.0% similarity, with 3 matched token, and 2 token mismatch\n", - "## Model validation for 10 tokens : 80.0% similarity, with 8 matched token, and 2 token mismatch\n", - "## Model validation for 15 tokens : 86.66666666666667% similarity, with 13 matched token, and 2 token mismatch\n", - "## Model validation for 20 tokens : 90.0% similarity, with 18 matched token, and 2 token mismatch\n", - "## Model validation for 25 tokens : 92.0% similarity, with 23 matched token, and 2 token mismatch\n", - "## Model validation for 30 tokens : 93.33333333333333% similarity, with 28 matched token, and 2 token mismatch\n", - "## Model validation for 35 tokens : 94.28571428571428% similarity, with 33 matched token, and 2 token mismatch\n", - "## Model validation for 40 tokens : 95.0% similarity, with 38 matched token, and 2 token mismatch\n", - "## Model validation for 45 tokens : 95.55555555555556% similarity, with 43 matched token, and 2 token mismatch\n", - "## Model validation for 50 tokens : 96.0% similarity, with 48 matched token, and 2 token mismatch\n", - "## Model validation for 55 tokens : 96.36363636363636% similarity, with 53 matched token, and 2 token mismatch\n", - "## Model validation for 60 tokens : 96.66666666666667% similarity, with 58 matched token, and 2 token mismatch\n", - "## Model validation for 65 tokens : 96.92307692307692% similarity, with 63 matched token, and 2 token mismatch\n", - "## Model validation for 70 tokens : 97.14285714285714% similarity, with 68 matched token, and 2 token mismatch\n", - "## Model validation for 75 tokens : 97.33333333333334% similarity, with 73 matched token, and 2 token mismatch\n", - "## Model validation for 80 tokens : 97.5% similarity, with 78 matched token, and 2 token mismatch\n", - "## Model validation for 85 tokens : 97.6470588235294% similarity, with 83 matched token, and 2 token mismatch\n", - "## Model validation for 90 tokens : 97.77777777777777% similarity, with 88 matched token, and 2 token mismatch\n", - "## Model validation for 95 tokens : 97.89473684210527% similarity, with 93 matched token, and 2 token mismatch\n", - "## Model validation for 100 tokens : 98.0% similarity, with 98 matched token, and 2 token mismatch\n", - "## Model validation for 105 tokens : 98.09523809523809% similarity, with 103 matched token, and 2 token mismatch\n", - "## Model validation for 110 tokens : 98.18181818181819% similarity, with 108 matched token, and 2 token mismatch\n", - "## Model validation for 115 tokens : 98.26086956521739% similarity, with 113 matched token, and 2 token mismatch\n", - "## Model validation for 120 tokens : 98.33333333333333% similarity, with 118 matched token, and 2 token mismatch\n", - "## Model validation for 125 tokens : 98.4% similarity, with 123 matched token, and 2 token mismatch\n", - "## Model validation for 130 tokens : 98.46153846153847% similarity, with 128 matched token, and 2 token mismatch\n", - "## Model validation for 135 tokens : 98.51851851851852% similarity, with 133 matched token, and 2 token mismatch\n", - "## Model validation for 140 tokens : 98.57142857142858% similarity, with 138 matched token, and 2 token mismatch\n", - "## Model validation for 145 tokens : 98.62068965517241% similarity, with 143 matched token, and 2 token mismatch\n", - "## Model validation for 150 tokens : 98.66666666666667% similarity, with 148 matched token, and 2 token mismatch\n", - "## Model validation for 160 tokens : 98.75% similarity, with 158 matched token, and 2 token mismatch\n", - "## Model validation for 170 tokens : 98.82352941176471% similarity, with 168 matched token, and 2 token mismatch\n", - "## Model validation for 180 tokens : 98.88888888888889% similarity, with 178 matched token, and 2 token mismatch\n", - "## Model validation for 190 tokens : 98.94736842105263% similarity, with 188 matched token, and 2 token mismatch\n", - "## Model validation for 200 tokens : 99.0% similarity, with 198 matched token, and 2 token mismatch\n", - "## Model validation for 210 tokens : 99.04761904761905% similarity, with 208 matched token, and 2 token mismatch\n", - "## Model validation for 220 tokens : 98.63636363636363% similarity, with 217 matched token, and 3 token mismatch\n", - "## Model validation for 230 tokens : 98.69565217391305% similarity, with 227 matched token, and 3 token mismatch\n", - "## Model validation for 240 tokens : 98.33333333333333% similarity, with 236 matched token, and 4 token mismatch\n", - "## Model validation for 250 tokens : 98.4% similarity, with 246 matched token, and 4 token mismatch\n", - "## Model validation for 260 tokens : 98.84615384615385% similarity, with 257 matched token, and 3 token mismatch\n", - "## Model validation for 270 tokens : 97.4074074074074% similarity, with 263 matched token, and 7 token mismatch\n", - "## Model validation for 280 tokens : 98.57142857142858% similarity, with 276 matched token, and 4 token mismatch\n", - "## Model validation for 290 tokens : 97.93103448275862% similarity, with 284 matched token, and 6 token mismatch\n", - "## Model validation for 300 tokens : 98.33333333333333% similarity, with 295 matched token, and 5 token mismatch\n", - "## Model validation for 325 tokens : 97.84615384615385% similarity, with 318 matched token, and 7 token mismatch\n", - "## Model validation for 350 tokens : 97.71428571428571% similarity, with 342 matched token, and 8 token mismatch\n", - "## Model validation for 375 tokens : 98.4% similarity, with 369 matched token, and 6 token mismatch\n", - "## Model validation for 400 tokens : 98.0% similarity, with 392 matched token, and 8 token mismatch\n", - "## Model validation for 425 tokens : 98.11764705882354% similarity, with 417 matched token, and 8 token mismatch\n", - "## Model validation for 450 tokens : 97.11111111111111% similarity, with 437 matched token, and 13 token mismatch\n", - "## Model validation for 475 tokens : 97.05263157894737% similarity, with 461 matched token, and 14 token mismatch\n", - "## Model validation for 500 tokens : 96.8% similarity, with 484 matched token, and 16 token mismatch\n", - "## Model validation for 525 tokens : 96.19047619047619% similarity, with 505 matched token, and 20 token mismatch\n", - "## Model validation for 550 tokens : 95.81818181818181% similarity, with 527 matched token, and 23 token mismatch\n", - "## Model validation for 575 tokens : 96.34782608695652% similarity, with 554 matched token, and 21 token mismatch\n", - "## Model validation for 600 tokens : 96.0% similarity, with 576 matched token, and 24 token mismatch\n", - "## Model validation for 625 tokens : 94.88% similarity, with 593 matched token, and 32 token mismatch\n", - "## Model validation for 650 tokens : 94.3076923076923% similarity, with 613 matched token, and 37 token mismatch\n", - "## Model validation for 675 tokens : 93.62962962962963% similarity, with 632 matched token, and 43 token mismatch\n", - "## Model validation for 700 tokens : 92.57142857142857% similarity, with 648 matched token, and 52 token mismatch\n", - "## Model validation for 750 tokens : 91.33333333333333% similarity, with 685 matched token, and 65 token mismatch\n", - "## Model validation for 800 tokens : 89.75% similarity, with 718 matched token, and 82 token mismatch\n", - "## Model validation for 850 tokens : 88.47058823529412% similarity, with 752 matched token, and 98 token mismatch\n", - "## Model validation for 900 tokens : 87.44444444444444% similarity, with 787 matched token, and 113 token mismatch\n", - "## Model validation for 950 tokens : 85.26315789473684% similarity, with 810 matched token, and 140 token mismatch\n", - "## Model validation for 1000 tokens : 84.6% similarity, with 846 matched token, and 154 token mismatch\n", - "###\n", - "### Model validation end ###\n", - "###\n", - "SCRIPT_DIR: /home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/memory_script\n", - "PROJECT_DIR: /home/recursal/RWKV-infctx-trainer\n", - "MODEL_CODE_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v5\n", - "[2024-01-22 22:56:41,762] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", - " return self.fget.__get__(instance, owner)()\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "/home/recursal/RWKV-infctx-trainer/RWKV-v5/src/model.py:1390: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", - " batch_tokens = torch.tensor(\n", - "###\n", - "### Model validation start ###\n", - "###\n", - "## Model validation for 1000 tokens : 84.6% similarity, with 846 matched token, and 154 token mismatch\n", - "## Model validation for 1050 tokens : 81.61904761904762% similarity, with 857 matched token, and 193 token mismatch\n", - "## Model validation for 1100 tokens : 80.0% similarity, with 880 matched token, and 220 token mismatch\n", - "## Model validation for 1150 tokens : 76.52173913043478% similarity, with 880 matched token, and 270 token mismatch\n", - "## Model validation for 1200 tokens : 74.41666666666666% similarity, with 893 matched token, and 307 token mismatch\n", - "## Model validation for 1250 tokens : 73.04% similarity, with 913 matched token, and 337 token mismatch\n", - "## Model validation for 1300 tokens : 70.15384615384616% similarity, with 912 matched token, and 388 token mismatch\n", - "## Model validation for 1350 tokens : 67.62962962962963% similarity, with 913 matched token, and 437 token mismatch\n", - "## Model validation for 1400 tokens : 65.64285714285715% similarity, with 919 matched token, and 481 token mismatch\n", - "## Model validation for 1450 tokens : 62.96551724137931% similarity, with 913 matched token, and 537 token mismatch\n", - "## Model validation for 1500 tokens : 59.8% similarity, with 897 matched token, and 603 token mismatch\n", - "## Model validation for 1550 tokens : 58.774193548387096% similarity, with 911 matched token, and 639 token mismatch\n", - "## Model validation for 1600 tokens : 56.8125% similarity, with 909 matched token, and 691 token mismatch\n", - "## Model validation for 1650 tokens : 53.81818181818182% similarity, with 888 matched token, and 762 token mismatch\n", - "## Model validation for 1700 tokens : 52.294117647058826% similarity, with 889 matched token, and 811 token mismatch\n", - "## Model validation for 1750 tokens : 49.42857142857143% similarity, with 865 matched token, and 885 token mismatch\n", - "## Model validation for 1800 tokens : 46.833333333333336% similarity, with 843 matched token, and 957 token mismatch\n", - "## Model validation for 1850 tokens : 44.81081081081081% similarity, with 829 matched token, and 1021 token mismatch\n", - "## Model validation for 1900 tokens : 43.57894736842105% similarity, with 828 matched token, and 1072 token mismatch\n", - "## Model validation for 1950 tokens : 40.35897435897436% similarity, with 787 matched token, and 1163 token mismatch\n", - "## Model validation for 2000 tokens : 39.2% similarity, with 784 matched token, and 1216 token mismatch\n", - "## Model validation for 2050 tokens : 37.26829268292683% similarity, with 764 matched token, and 1286 token mismatch\n", - "## Model validation for 2100 tokens : 35.76190476190476% similarity, with 751 matched token, and 1349 token mismatch\n", - "## Model validation for 2150 tokens : 34.27906976744186% similarity, with 737 matched token, and 1413 token mismatch\n", - "## Model validation for 2200 tokens : 32.95454545454545% similarity, with 725 matched token, and 1475 token mismatch\n", - "## Model validation for 2250 tokens : 32.22222222222222% similarity, with 725 matched token, and 1525 token mismatch\n", - "## Model validation for 2300 tokens : 30.043478260869566% similarity, with 691 matched token, and 1609 token mismatch\n", - "## Model validation for 2350 tokens : 29.06382978723404% similarity, with 683 matched token, and 1667 token mismatch\n", - "## Model validation for 2400 tokens : 27.375% similarity, with 657 matched token, and 1743 token mismatch\n", - "## Model validation for 2450 tokens : 26.040816326530614% similarity, with 638 matched token, and 1812 token mismatch\n", - "## Model validation for 2500 tokens : 24.12% similarity, with 603 matched token, and 1897 token mismatch\n", - "## Model validation for 2550 tokens : 22.980392156862745% similarity, with 586 matched token, and 1964 token mismatch\n", - "## Model validation for 2600 tokens : 22.692307692307693% similarity, with 590 matched token, and 2010 token mismatch\n", - "## Model validation for 2650 tokens : 21.132075471698116% similarity, with 560 matched token, and 2090 token mismatch\n", - "## Model validation for 2700 tokens : 20.14814814814815% similarity, with 544 matched token, and 2156 token mismatch\n", - "## Model validation for 2750 tokens : 18.654545454545453% similarity, with 513 matched token, and 2237 token mismatch\n", - "## Model validation for 2800 tokens : 17.892857142857142% similarity, with 501 matched token, and 2299 token mismatch\n", - "## Model validation for 2850 tokens : 17.192982456140353% similarity, with 490 matched token, and 2360 token mismatch\n", - "## Model validation for 2900 tokens : 16.10344827586207% similarity, with 467 matched token, and 2433 token mismatch\n", - "## Model validation for 2950 tokens : 15.050847457627118% similarity, with 444 matched token, and 2506 token mismatch\n", - "## Model validation for 3000 tokens : 13.900000000000002% similarity, with 417 matched token, and 2583 token mismatch\n", - "###\n", - "### Model validation end ###\n", - "###\n" - ] - } - ], + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db5eef37", + "metadata": {}, + "outputs": [], "source": [ "# Lets do a memory eval!\n", "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", @@ -2064,14 +482,18 @@ }, { "cell_type": "markdown", + "id": "6fe5d71b", "metadata": {}, "source": [ - "## Finetune 2 (2k -> 4k) - More data" + "## Finetune 2 (0 -> 2*4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, + "id": "0f54c3e1", "metadata": {}, "outputs": [ { @@ -2079,4810 +501,1736 @@ "output_type": "stream", "text": [ "## Generating word reptition dataset ##\n", - "Generated JSONL file with - 5 max words, 500 samples - at ./dataset/gen-word-5-count.jsonl\n", - "Generated JSONL file with - 20 max words, 500 samples - at ./dataset/gen-word-20-count.jsonl\n", - "Generated JSONL file with - 35 max words, 500 samples - at ./dataset/gen-word-35-count.jsonl\n", - "Generated JSONL file with - 190 max words, 50 samples - at ./dataset/gen-word-190-count.jsonl\n", - "Generated JSONL file with - 60 max words, 500 samples - at ./dataset/gen-word-60-count.jsonl\n", - "Generated JSONL file with - 205 max words, 50 samples - at ./dataset/gen-word-205-count.jsonl\n", - "Generated JSONL file with - 10 max words, 500 samples - at ./dataset/gen-word-10-count.jsonl\n", - "Generated JSONL file with - 200 max words, 50 samples - at ./dataset/gen-word-200-count.jsonl\n", - "Generated JSONL file with - 230 max words, 50 samples - at ./dataset/gen-word-230-count.jsonl\n", - "Generated JSONL file with - 15 max words, 500 samples - at ./dataset/gen-word-15-count.jsonl\n", - "Generated JSONL file with - 255 max words, 50 samples - at ./dataset/gen-word-255-count.jsonl\n", - "Generated JSONL file with - 110 max words, 50 samples - at ./dataset/gen-word-110-count.jsonl\n", - "Generated JSONL file with - 235 max words, 50 samples - at ./dataset/gen-word-235-count.jsonl\n", - "Generated JSONL file with - 95 max words, 500 samples - at ./dataset/gen-word-95-count.jsonl\n", - "Generated JSONL file with - 90 max words, 500 samples - at ./dataset/gen-word-90-count.jsonl\n", - "Generated JSONL file with - 135 max words, 50 samples - at ./dataset/gen-word-135-count.jsonl\n", - "Generated JSONL file with - 120 max words, 50 samples - at ./dataset/gen-word-120-count.jsonl\n", - "Generated JSONL file with - 260 max words, 50 samples - at ./dataset/gen-word-260-count.jsonl\n", - "Generated JSONL file with - 80 max words, 500 samples - at ./dataset/gen-word-80-count.jsonl\n", - "Generated JSONL file with - 115 max words, 50 samples - at ./dataset/gen-word-115-count.jsonl\n", - "Generated JSONL file with - 25 max words, 500 samples - at ./dataset/gen-word-25-count.jsonl\n", - "Generated JSONL file with - 105 max words, 50 samples - at ./dataset/gen-word-105-count.jsonl\n", - "Generated JSONL file with - 125 max words, 50 samples - at ./dataset/gen-word-125-count.jsonl\n", - "Generated JSONL file with - 140 max words, 50 samples - at ./dataset/gen-word-140-count.jsonl\n", - "Generated JSONL file with - 195 max words, 50 samples - at ./dataset/gen-word-195-count.jsonl\n", - "Generated JSONL file with - 30 max words, 500 samples - at ./dataset/gen-word-30-count.jsonl\n", - "Generated JSONL file with - 130 max words, 50 samples - at ./dataset/gen-word-130-count.jsonl\n", - "Generated JSONL file with - 40 max words, 500 samples - at ./dataset/gen-word-40-count.jsonl\n", - "Generated JSONL file with - 165 max words, 50 samples - at ./dataset/gen-word-165-count.jsonl\n", - "Generated JSONL file with - 145 max words, 50 samples - at ./dataset/gen-word-145-count.jsonl\n", - "Generated JSONL file with - 160 max words, 50 samples - at ./dataset/gen-word-160-count.jsonl\n", - "Generated JSONL file with - 150 max words, 50 samples - at ./dataset/gen-word-150-count.jsonl\n", - "Generated JSONL file with - 50 max words, 500 samples - at ./dataset/gen-word-50-count.jsonl\n", - "Generated JSONL file with - 185 max words, 50 samples - at ./dataset/gen-word-185-count.jsonl\n", - "Generated JSONL file with - 155 max words, 50 samples - at ./dataset/gen-word-155-count.jsonl\n", - "Generated JSONL file with - 170 max words, 50 samples - at ./dataset/gen-word-170-count.jsonl\n", - "Generated JSONL file with - 175 max words, 50 samples - at ./dataset/gen-word-175-count.jsonl\n", - "Generated JSONL file with - 180 max words, 50 samples - at ./dataset/gen-word-180-count.jsonl\n", - "Generated JSONL file with - 70 max words, 500 samples - at ./dataset/gen-word-70-count.jsonl\n", - "Generated JSONL file with - 370 max words, 50 samples - at ./dataset/gen-word-370-count.jsonl\n", - "Generated JSONL file with - 210 max words, 50 samples - at ./dataset/gen-word-210-count.jsonl\n", - "Generated JSONL file with - 360 max words, 50 samples - at ./dataset/gen-word-360-count.jsonl\n", - "Generated JSONL file with - 45 max words, 500 samples - at ./dataset/gen-word-45-count.jsonl\n", - "Generated JSONL file with - 250 max words, 50 samples - at ./dataset/gen-word-250-count.jsonl\n", - "Generated JSONL file with - 225 max words, 50 samples - at ./dataset/gen-word-225-count.jsonl\n", - "Generated JSONL file with - 220 max words, 50 samples - at ./dataset/gen-word-220-count.jsonl\n", - "Generated JSONL file with - 55 max words, 500 samples - at ./dataset/gen-word-55-count.jsonl\n", - "Generated JSONL file with - 215 max words, 50 samples - at ./dataset/gen-word-215-count.jsonl\n", - "Generated JSONL file with - 275 max words, 50 samples - at ./dataset/gen-word-275-count.jsonl\n", - "Generated JSONL file with - 65 max words, 500 samples - at ./dataset/gen-word-65-count.jsonl\n", - "Generated JSONL file with - 240 max words, 50 samples - at ./dataset/gen-word-240-count.jsonl\n", - "Generated JSONL file with - 245 max words, 50 samples - at ./dataset/gen-word-245-count.jsonl\n", - "Generated JSONL file with - 270 max words, 50 samples - at ./dataset/gen-word-270-count.jsonl\n", - "Generated JSONL file with - 265 max words, 50 samples - at ./dataset/gen-word-265-count.jsonl\n", - "Generated JSONL file with - 75 max words, 500 samples - at ./dataset/gen-word-75-count.jsonl\n", - "Generated JSONL file with - 300 max words, 50 samples - at ./dataset/gen-word-300-count.jsonl\n", - "Generated JSONL file with - 280 max words, 50 samples - at ./dataset/gen-word-280-count.jsonl\n", - "Generated a single JSONL file with 950 samples (50 token repeat) - 115 max words - at ./dataset/shuffle-word-115-count.jsonl\n", - "Generated JSONL file with - 290 max words, 50 samples - at ./dataset/gen-word-290-count.jsonl\n", - "Generated JSONL file with - 310 max words, 50 samples - at ./dataset/gen-word-310-count.jsonl\n", - "Generated JSONL file with - 305 max words, 50 samples - at ./dataset/gen-word-305-count.jsonl\n", - "Generated JSONL file with - 315 max words, 50 samples - at ./dataset/gen-word-315-count.jsonl\n", - "Generated a single JSONL file with 703 samples (50 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", - "Generated JSONL file with - 85 max words, 500 samples - at ./dataset/gen-word-85-count.jsonl\n", - "Generated JSONL file with - 330 max words, 50 samples - at ./dataset/gen-word-330-count.jsonl\n", - "Generated JSONL file with - 100 max words, 500 samples - at ./dataset/gen-word-100-count.jsonl\n", - "Generated JSONL file with - 335 max words, 50 samples - at ./dataset/gen-word-335-count.jsonl\n", - "Generated JSONL file with - 340 max words, 50 samples - at ./dataset/gen-word-340-count.jsonl\n", - "Generated JSONL file with - 425 max words, 50 samples - at ./dataset/gen-word-425-count.jsonl\n", - "Generated a single JSONL file with 471 samples (50 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", - "Generated a single JSONL file with 459 samples (50 token repeat) - 265 max words - at ./dataset/shuffle-word-265-count.jsonl\n", - "Generated JSONL file with - 345 max words, 50 samples - at ./dataset/gen-word-345-count.jsonl\n", - "Generated a single JSONL file with 466 samples (50 token repeat) - 255 max words - at ./dataset/shuffle-word-255-count.jsonl\n", - "Generated JSONL file with - 350 max words, 50 samples - at ./dataset/gen-word-350-count.jsonl\n", - "Generated a single JSONL file with 1027 samples (50 token repeat) - 105 max words - at ./dataset/shuffle-word-105-count.jsonl\n", - "Generated JSONL file with - 355 max words, 50 samples - at ./dataset/gen-word-355-count.jsonl\n", - "Generated JSONL file with - 490 max words, 50 samples - at ./dataset/gen-word-490-count.jsonl\n", - "Generated JSONL file with - 390 max words, 50 samples - at ./dataset/gen-word-390-count.jsonl\n", - "Generated a single JSONL file with 920 samples (50 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", - "Generated JSONL file with - 430 max words, 50 samples - at ./dataset/gen-word-430-count.jsonl\n", - "Generated JSONL file with - 380 max words, 50 samples - at ./dataset/gen-word-380-count.jsonl\n", - "Generated JSONL file with - 375 max words, 50 samples - at ./dataset/gen-word-375-count.jsonl\n", - "Generated JSONL file with - 440 max words, 50 samples - at ./dataset/gen-word-440-count.jsonl\n", - "Generated JSONL file with - 385 max words, 50 samples - at ./dataset/gen-word-385-count.jsonl\n", - "Generated JSONL file with - 285 max words, 50 samples - at ./dataset/gen-word-285-count.jsonl\n", - "Generated JSONL file with - 435 max words, 50 samples - at ./dataset/gen-word-435-count.jsonl\n", - "Generated JSONL file with - 295 max words, 50 samples - at ./dataset/gen-word-295-count.jsonl\n", - "Generated JSONL file with - 395 max words, 50 samples - at ./dataset/gen-word-395-count.jsonl\n", - "Generated a single JSONL file with 737 samples (50 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", - "Generated JSONL file with - 500 max words, 50 samples - at ./dataset/gen-word-500-count.jsonl\n", - "Generated JSONL file with - 545 max words, 50 samples - at ./dataset/gen-word-545-count.jsonl\n", - "Generated JSONL file with - 580 max words, 50 samples - at ./dataset/gen-word-580-count.jsonl\n", - "Generated JSONL file with - 405 max words, 50 samples - at ./dataset/gen-word-405-count.jsonl\n", - "Generated JSONL file with - 420 max words, 50 samples - at ./dataset/gen-word-420-count.jsonl\n", - "Generated a single JSONL file with 469 samples (50 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonlGenerated JSONL file with - 400 max words, 50 samples - at ./dataset/gen-word-400-count.jsonl\n", - "\n", - "Generated JSONL file with - 320 max words, 50 samples - at ./dataset/gen-word-320-count.jsonl\n", - "Generated JSONL file with - 555 max words, 50 samples - at ./dataset/gen-word-555-count.jsonl\n", - "Generated a single JSONL file with 459 samples (50 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", - "Generated JSONL file with - 540 max words, 50 samples - at ./dataset/gen-word-540-count.jsonl\n", - "Generated JSONL file with - 515 max words, 50 samples - at ./dataset/gen-word-515-count.jsonl\n", - "Generated JSONL file with - 585 max words, 50 samples - at ./dataset/gen-word-585-count.jsonl\n", - "Generated JSONL file with - 475 max words, 50 samples - at ./dataset/gen-word-475-count.jsonl\n", - "Generated JSONL file with - 645 max words, 50 samples - at ./dataset/gen-word-645-count.jsonl\n", - "Generated a single JSONL file with 842 samples (50 token repeat) - 135 max words - at ./dataset/shuffle-word-135-count.jsonl\n", - "Generated JSONL file with - 530 max words, 50 samples - at ./dataset/gen-word-530-count.jsonl\n", - "Generated JSONL file with - 525 max words, 50 samples - at ./dataset/gen-word-525-count.jsonl\n", - "Generated JSONL file with - 510 max words, 50 samples - at ./dataset/gen-word-510-count.jsonl\n", - "Generated JSONL file with - 410 max words, 50 samples - at ./dataset/gen-word-410-count.jsonl\n", - "Generated a single JSONL file with 295 samples (50 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", - "Generated a single JSONL file with 459 samples (50 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", - "Generated JSONL file with - 325 max words, 50 samples - at ./dataset/gen-word-325-count.jsonl\n", - "Generated JSONL file with - 570 max words, 50 samples - at ./dataset/gen-word-570-count.jsonl\n", - "Generated JSONL file with - 655 max words, 50 samples - at ./dataset/gen-word-655-count.jsonl\n", - "Generated JSONL file with - 535 max words, 50 samples - at ./dataset/gen-word-535-count.jsonl\n", - "Generated JSONL file with - 445 max words, 50 samples - at ./dataset/gen-word-445-count.jsonl\n", - "Generated a single JSONL file with 364 samples (50 token repeat) - 305 max words - at ./dataset/shuffle-word-305-count.jsonl\n", - "Generated JSONL file with - 610 max words, 50 samples - at ./dataset/gen-word-610-count.jsonl\n", - "Generated JSONL file with - 520 max words, 50 samples - at ./dataset/gen-word-520-count.jsonl\n", - "Generated JSONL file with - 365 max words, 50 samples - at ./dataset/gen-word-365-count.jsonl\n", - "Generated a single JSONL file with 350 samples (50 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", - "Generated a single JSONL file with 351 samples (50 token repeat) - 345 max words - at ./dataset/shuffle-word-345-count.jsonl\n", - "Generated a single JSONL file with 988 samples (50 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", - "Generated a single JSONL file with 791 samples (50 token repeat) - 145 max words - at ./dataset/shuffle-word-145-count.jsonl\n", - "Generated JSONL file with - 605 max words, 50 samples - at ./dataset/gen-word-605-count.jsonl\n", - "Generated a single JSONL file with 299 samples (50 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", - "Generated a single JSONL file with 349 samples (50 token repeat) - 385 max words - at ./dataset/shuffle-word-385-count.jsonl\n", - "Generated a single JSONL file with 772 samples (50 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", - "Generated a single JSONL file with 809 samples (50 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonlGenerated a single JSONL file with 702 samples (50 token repeat) - 175 max words - at ./dataset/shuffle-word-175-count.jsonl\n", - "\n", - "Generated a single JSONL file with 862 samples (50 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", - "Generated a single JSONL file with 693 samples (50 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", - "Generated a single JSONL file with 894 samples (50 token repeat) - 125 max words - at ./dataset/shuffle-word-125-count.jsonl\n", - "Generated JSONL file with - 650 max words, 50 samples - at ./dataset/gen-word-650-count.jsonl\n", - "Generated a single JSONL file with 544 samples (50 token repeat) - 205 max words - at ./dataset/shuffle-word-205-count.jsonl\n", - "Generated JSONL file with - 600 max words, 50 samples - at ./dataset/gen-word-600-count.jsonl\n", - "Generated a single JSONL file with 507 samples (50 token repeat) - 215 max words - at ./dataset/shuffle-word-215-count.jsonl\n", - "Generated JSONL file with - 615 max words, 50 samples - at ./dataset/gen-word-615-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 535 max words - at ./dataset/shuffle-word-535-count.jsonl\n", - "Generated a single JSONL file with 681 samples (50 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", - "Generated a single JSONL file with 720 samples (50 token repeat) - 165 max words - at ./dataset/shuffle-word-165-count.jsonl\n", - "Generated a single JSONL file with 504 samples (50 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", - "Generated a single JSONL file with 499 samples (50 token repeat) - 225 max words - at ./dataset/shuffle-word-225-count.jsonl\n", - "Generated JSONL file with - 495 max words, 50 samples - at ./dataset/gen-word-495-count.jsonl\n", - "Generated a single JSONL file with 479 samples (50 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", - "Generated a single JSONL file with 680 samples (50 token repeat) - 195 max words - at ./dataset/shuffle-word-195-count.jsonl\n", - "Generated JSONL file with - 630 max words, 50 samples - at ./dataset/gen-word-630-count.jsonl\n", - "Generated a single JSONL file with 521 samples (50 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", - "Generated a single JSONL file with 494 samples (50 token repeat) - 235 max words - at ./dataset/shuffle-word-235-count.jsonl\n", - "Generated JSONL file with - 620 max words, 50 samples - at ./dataset/gen-word-620-count.jsonl\n", - "Generated a single JSONL file with 685 samples (50 token repeat) - 185 max words - at ./dataset/shuffle-word-185-count.jsonl\n", - "Generated a single JSONL file with 499 samples (50 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", - "Generated JSONL file with - 640 max words, 50 samples - at ./dataset/gen-word-640-count.jsonl\n", - "Generated a single JSONL file with 461 samples (50 token repeat) - 275 max words - at ./dataset/shuffle-word-275-count.jsonl\n", - "Generated a single JSONL file with 666 samples (50 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", - "Generated a single JSONL file with 473 samples (50 token repeat) - 245 max words - at ./dataset/shuffle-word-245-count.jsonl\n", - "Generated a single JSONL file with 352 samples (50 token repeat) - 325 max words - at ./dataset/shuffle-word-325-count.jsonl\n", - "Generated JSONL file with - 470 max words, 50 samples - at ./dataset/gen-word-470-count.jsonl\n", - "Generated JSONL file with - 725 max words, 50 samples - at ./dataset/gen-word-725-count.jsonl\n", - "Generated a single JSONL file with 748 samples (50 token repeat) - 155 max words - at ./dataset/shuffle-word-155-count.jsonl\n", - "Generated a single JSONL file with 456 samples (50 token repeat) - 295 max words - at ./dataset/shuffle-word-295-count.jsonl\n", - "Generated a single JSONL file with 456 samples (50 token repeat) - 285 max words - at ./dataset/shuffle-word-285-count.jsonl\n", - "Generated a single JSONL file with 458 samples (50 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", - "Generated JSONL file with - 705 max words, 50 samples - at ./dataset/gen-word-705-count.jsonl\n", - "Generated a single JSONL file with 353 samples (50 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", - "Generated JSONL file with - 415 max words, 50 samples - at ./dataset/gen-word-415-count.jsonl\n", - "Generated a single JSONL file with 290 samples (50 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", - "Generated JSONL file with - 465 max words, 50 samples - at ./dataset/gen-word-465-count.jsonl\n", - "Generated JSONL file with - 450 max words, 50 samples - at ./dataset/gen-word-450-count.jsonl\n", - "Generated JSONL file with - 670 max words, 50 samples - at ./dataset/gen-word-670-count.jsonl\n", - "Generated JSONL file with - 505 max words, 50 samples - at ./dataset/gen-word-505-count.jsonl\n", - "Generated a single JSONL file with 353 samples (50 token repeat) - 355 max words - at ./dataset/shuffle-word-355-count.jsonl\n", - "Generated JSONL file with - 560 max words, 50 samples - at ./dataset/gen-word-560-count.jsonl\n", - "Generated JSONL file with - 460 max words, 50 samples - at ./dataset/gen-word-460-count.jsonl\n", - "Generated a single JSONL file with 352 samples (50 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", - "Generated a single JSONL file with 293 samples (50 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", - "Generated a single JSONL file with 351 samples (50 token repeat) - 395 max words - at ./dataset/shuffle-word-395-count.jsonl\n", - "Generated JSONL file with - 455 max words, 50 samples - at ./dataset/gen-word-455-count.jsonl\n", - "Generated a single JSONL file with 295 samples (50 token repeat) - 445 max words - at ./dataset/shuffle-word-445-count.jsonl\n", - "Generated JSONL file with - 710 max words, 50 samples - at ./dataset/gen-word-710-count.jsonl\n", - "Generated a single JSONL file with 296 samples (50 token repeat) - 435 max words - at ./dataset/shuffle-word-435-count.jsonl\n", - "Generated JSONL file with - 715 max words, 50 samples - at ./dataset/gen-word-715-count.jsonl\n", - "Generated a single JSONL file with 249 samples (50 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", - "Generated JSONL file with - 735 max words, 50 samples - at ./dataset/gen-word-735-count.jsonl\n", - "Generated a single JSONL file with 299 samples (50 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", - "Generated JSONL file with - 660 max words, 50 samples - at ./dataset/gen-word-660-count.jsonl\n", - "Generated JSONL file with - 485 max words, 50 samples - at ./dataset/gen-word-485-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 615 max words - at ./dataset/shuffle-word-615-count.jsonl\n", - "Generated JSONL file with - 910 max words, 50 samples - at ./dataset/gen-word-910-count.jsonl\n", - "Generated a single JSONL file with 299 samples (50 token repeat) - 405 max words - at ./dataset/shuffle-word-405-count.jsonl\n", - "Generated a single JSONL file with 199 samples (50 token repeat) - 755 max words - at ./dataset/shuffle-word-755-count.jsonl\n", - "Generated JSONL file with - 665 max words, 50 samples - at ./dataset/gen-word-665-count.jsonl\n", - "Generated JSONL file with - 480 max words, 50 samples - at ./dataset/gen-word-480-count.jsonl\n", - "Generated JSONL file with - 565 max words, 50 samples - at ./dataset/gen-word-565-count.jsonl\n", - "Generated JSONL file with - 690 max words, 50 samples - at ./dataset/gen-word-690-count.jsonl\n", - "Generated a single JSONL file with 199 samples (50 token repeat) - 745 max words - at ./dataset/shuffle-word-745-count.jsonl\n", - "Generated JSONL file with - 590 max words, 50 samples - at ./dataset/gen-word-590-count.jsonl\n", - "Generated a single JSONL file with 291 samples (50 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", - "Generated a single JSONL file with 294 samples (50 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", - "Generated a single JSONL file with 298 samples (50 token repeat) - 425 max words - at ./dataset/shuffle-word-425-count.jsonl\n", - "Generated a single JSONL file with 249 samples (50 token repeat) - 575 max words - at ./dataset/shuffle-word-575-count.jsonl\n", - "Generated JSONL file with - 825 max words, 50 samples - at ./dataset/gen-word-825-count.jsonl\n", - "Generated JSONL file with - 785 max words, 50 samples - at ./dataset/gen-word-785-count.jsonl\n", - "Generated JSONL file with - 770 max words, 50 samples - at ./dataset/gen-word-770-count.jsonl\n", - "Generated a single JSONL file with 296 samples (50 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", - "Generated a single JSONL file with 199 samples (50 token repeat) - 725 max words - at ./dataset/shuffle-word-725-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 595 max words - at ./dataset/shuffle-word-595-count.jsonl\n", - "Generated JSONL file with - 760 max words, 50 samples - at ./dataset/gen-word-760-count.jsonl\n", - "Generated a single JSONL file with 295 samples (50 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", - "Generated a single JSONL file with 202 samples (50 token repeat) - 635 max words - at ./dataset/shuffle-word-635-count.jsonl\n", - "Generated JSONL file with - 810 max words, 50 samples - at ./dataset/gen-word-810-count.jsonl\n", - "Generated JSONL file with - 855 max words, 50 samples - at ./dataset/gen-word-855-count.jsonl\n", - "Generated a single JSONL file with 201 samples (50 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", - "Generated JSONL file with - 595 max words, 50 samples - at ./dataset/gen-word-595-count.jsonl\n", - "Generated JSONL file with - 900 max words, 50 samples - at ./dataset/gen-word-900-count.jsonl\n", - "Generated JSONL file with - 625 max words, 50 samples - at ./dataset/gen-word-625-count.jsonl\n", - "Generated a single JSONL file with 199 samples (50 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", - "Generated a single JSONL file with 458 samples (50 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", - "Generated JSONL file with - 685 max words, 50 samples - at ./dataset/gen-word-685-count.jsonl\n", - "Generated JSONL file with - 800 max words, 50 samples - at ./dataset/gen-word-800-count.jsonl\n", - "Generated JSONL file with - 635 max words, 50 samples - at ./dataset/gen-word-635-count.jsonl\n", - "Generated a single JSONL file with 199 samples (50 token repeat) - 715 max words - at ./dataset/shuffle-word-715-count.jsonl\n", - "Generated a single JSONL file with 248 samples (50 token repeat) - 565 max words - at ./dataset/shuffle-word-565-count.jsonl\n", - "Generated JSONL file with - 550 max words, 50 samples - at ./dataset/gen-word-550-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 505 max words - at ./dataset/shuffle-word-505-count.jsonl\n", - "Generated a single JSONL file with 352 samples (50 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", - "Generated JSONL file with - 575 max words, 50 samples - at ./dataset/gen-word-575-count.jsonl\n", - "Generated a single JSONL file with 201 samples (50 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", - "Generated JSONL file with - 815 max words, 50 samples - at ./dataset/gen-word-815-count.jsonl\n", - "Generated a single JSONL file with 249 samples (50 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", - "Generated JSONL file with - 895 max words, 50 samples - at ./dataset/gen-word-895-count.jsonl\n", - "Generated a single JSONL file with 249 samples (50 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", - "Generated JSONL file with - 750 max words, 50 samples - at ./dataset/gen-word-750-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 585 max words - at ./dataset/shuffle-word-585-count.jsonl\n", - "Generated a single JSONL file with 295 samples (50 token repeat) - 495 max words - at ./dataset/shuffle-word-495-count.jsonl\n", - "Generated a single JSONL file with 249 samples (50 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 685 max words - at ./dataset/shuffle-word-685-count.jsonl\n", - "Generated JSONL file with - 920 max words, 50 samples - at ./dataset/gen-word-920-count.jsonl\n", - "Generated JSONL file with - 740 max words, 50 samples - at ./dataset/gen-word-740-count.jsonl\n", - "Generated a single JSONL file with 249 samples (50 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", - "Generated JSONL file with - 905 max words, 50 samples - at ./dataset/gen-word-905-count.jsonl\n", - "Generated JSONL file with - 680 max words, 50 samples - at ./dataset/gen-word-680-count.jsonl\n", - "Generated a single JSONL file with 353 samples (50 token repeat) - 335 max words - at ./dataset/shuffle-word-335-count.jsonl\n", - "Generated JSONL file with - 695 max words, 50 samples - at ./dataset/gen-word-695-count.jsonlGenerated JSONL file with - 720 max words, 50 samples - at ./dataset/gen-word-720-count.jsonl\n", - "Generated a single JSONL file with 290 samples (50 token repeat) - 465 max words - at ./dataset/shuffle-word-465-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", - "Generated a single JSONL file with 294 samples (50 token repeat) - 485 max words - at ./dataset/shuffle-word-485-count.jsonl\n", - "Generated JSONL file with - 700 max words, 50 samples - at ./dataset/gen-word-700-count.jsonl\n", - "Generated a single JSONL file with 249 samples (50 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", - "Generated JSONL file with - 730 max words, 50 samples - at ./dataset/gen-word-730-count.jsonl\n", - "Generated JSONL file with - 780 max words, 50 samples - at ./dataset/gen-word-780-count.jsonl\n", - "Generated JSONL file with - 765 max words, 50 samples - at ./dataset/gen-word-765-count.jsonl\n", - "Generated JSONL file with - 1020 max words, 50 samples - at ./dataset/gen-word-1020-count.jsonl\n", - "Generated JSONL file with - 930 max words, 50 samples - at ./dataset/gen-word-930-count.jsonl\n", - "Generated JSONL file with - 935 max words, 50 samples - at ./dataset/gen-word-935-count.jsonl\n", - "Generated a single JSONL file with 159 samples (50 token repeat) - 805 max words - at ./dataset/shuffle-word-805-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 545 max words - at ./dataset/shuffle-word-545-count.jsonl\n", - "Generated JSONL file with - 1100 max words, 50 samples - at ./dataset/gen-word-1100-count.jsonl\n", - "Generated a single JSONL file with 154 samples (50 token repeat) - 815 max words - at ./dataset/shuffle-word-815-count.jsonl\n", - "Generated a single JSONL file with 163 samples (50 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 905 max words - at ./dataset/shuffle-word-905-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", - "\n", - "Generated JSONL file with - 835 max words, 50 samples - at ./dataset/gen-word-835-count.jsonl\n", - "Generated a single JSONL file with 351 samples (50 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", - "Generated JSONL file with - 1120 max words, 50 samples - at ./dataset/gen-word-1120-count.jsonl\n", - "Generated JSONL file with - 840 max words, 50 samples - at ./dataset/gen-word-840-count.jsonl\n", - "Generated a single JSONL file with 357 samples (50 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", - "Generated JSONL file with - 675 max words, 50 samples - at ./dataset/gen-word-675-count.jsonl\n", - "Generated a single JSONL file with 203 samples (50 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", - "Generated a single JSONL file with 203 samples (50 token repeat) - 655 max words - at ./dataset/shuffle-word-655-count.jsonl\n", - "Generated a single JSONL file with 349 samples (50 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", - "Generated JSONL file with - 745 max words, 50 samples - at ./dataset/gen-word-745-count.jsonl\n", - "Generated JSONL file with - 755 max words, 50 samples - at ./dataset/gen-word-755-count.jsonl\n", - "Generated a single JSONL file with 157 samples (50 token repeat) - 845 max words - at ./dataset/shuffle-word-845-count.jsonl\n", - "Generated a single JSONL file with 197 samples (50 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", - "Generated JSONL file with - 875 max words, 50 samples - at ./dataset/gen-word-875-count.jsonl\n", - "Generated JSONL file with - 965 max words, 50 samples - at ./dataset/gen-word-965-count.jsonl\n", - "Generated JSONL file with - 820 max words, 50 samples - at ./dataset/gen-word-820-count.jsonl\n", - "Generated JSONL file with - 1055 max words, 50 samples - at ./dataset/gen-word-1055-count.jsonl\n", - "Generated a single JSONL file with 351 samples (50 token repeat) - 365 max words - at ./dataset/shuffle-word-365-count.jsonl\n", - "Generated JSONL file with - 955 max words, 50 samples - at ./dataset/gen-word-955-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 625 max words - at ./dataset/shuffle-word-625-count.jsonl\n", - "Generated a single JSONL file with 148 samples (50 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", - "Generated JSONL file with - 790 max words, 50 samples - at ./dataset/gen-word-790-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", - "Generated JSONL file with - 850 max words, 50 samples - at ./dataset/gen-word-850-count.jsonl\n", - "Generated JSONL file with - 885 max words, 50 samples - at ./dataset/gen-word-885-count.jsonl\n", - "Generated JSONL file with - 1060 max words, 50 samples - at ./dataset/gen-word-1060-count.jsonl\n", - "Generated JSONL file with - 1005 max words, 50 samples - at ./dataset/gen-word-1005-count.jsonl\n", - "Generated JSONL file with - 805 max words, 50 samples - at ./dataset/gen-word-805-count.jsonl\n", - "Generated a single JSONL file with 161 samples (50 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", - "Generated JSONL file with - 1105 max words, 50 samples - at ./dataset/gen-word-1105-count.jsonl\n", - "Generated JSONL file with - 1040 max words, 50 samples - at ./dataset/gen-word-1040-count.jsonl\n", - "Generated JSONL file with - 845 max words, 50 samples - at ./dataset/gen-word-845-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", - "Generated a single JSONL file with 349 samples (50 token repeat) - 375 max words - at ./dataset/shuffle-word-375-count.jsonl\n", - "Generated JSONL file with - 795 max words, 50 samples - at ./dataset/gen-word-795-count.jsonl\n", - "Generated a single JSONL file with 351 samples (50 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", - "Generated JSONL file with - 775 max words, 50 samples - at ./dataset/gen-word-775-count.jsonl\n", - "Generated a single JSONL file with 354 samples (50 token repeat) - 315 max words - at ./dataset/shuffle-word-315-count.jsonl\n", - "Generated JSONL file with - 830 max words, 50 samples - at ./dataset/gen-word-830-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1015 max words - at ./dataset/shuffle-word-1015-count.jsonl\n", - "Generated JSONL file with - 1085 max words, 50 samples - at ./dataset/gen-word-1085-count.jsonl\n", - "Generated JSONL file with - 1045 max words, 50 samples - at ./dataset/gen-word-1045-count.jsonl\n", - "Generated a single JSONL file with 154 samples (50 token repeat) - 895 max words - at ./dataset/shuffle-word-895-count.jsonl\n", - "Generated a single JSONL file with 351 samples (50 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", - "Generated a single JSONL file with 350 samples (50 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", - "Generated a single JSONL file with 199 samples (50 token repeat) - 705 max words - at ./dataset/shuffle-word-705-count.jsonl\n", - "Generated JSONL file with - 980 max words, 50 samples - at ./dataset/gen-word-980-count.jsonl\n", - "Generated JSONL file with - 1080 max words, 50 samples - at ./dataset/gen-word-1080-count.jsonl\n", - "Generated JSONL file with - 1155 max words, 50 samples - at ./dataset/gen-word-1155-count.jsonl\n", - "Generated JSONL file with - 1090 max words, 50 samples - at ./dataset/gen-word-1090-count.jsonl\n", - "Generated a single JSONL file with 295 samples (50 token repeat) - 455 max words - at ./dataset/shuffle-word-455-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 515 max words - at ./dataset/shuffle-word-515-count.jsonl\n", - "Generated JSONL file with - 860 max words, 50 samples - at ./dataset/gen-word-860-count.jsonl\n", - "Generated JSONL file with - 985 max words, 50 samples - at ./dataset/gen-word-985-count.jsonl\n", - "Generated a single JSONL file with 157 samples (50 token repeat) - 885 max words - at ./dataset/shuffle-word-885-count.jsonl\n", - "Generated a single JSONL file with 202 samples (50 token repeat) - 645 max words - at ./dataset/shuffle-word-645-count.jsonl\n", - "Generated JSONL file with - 950 max words, 50 samples - at ./dataset/gen-word-950-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1005 max words - at ./dataset/shuffle-word-1005-count.jsonl\n", - "Generated JSONL file with - 870 max words, 50 samples - at ./dataset/gen-word-870-count.jsonl\n", - "Generated a single JSONL file with 198 samples (50 token repeat) - 785 max words - at ./dataset/shuffle-word-785-count.jsonl\n", - "Generated JSONL file with - 1035 max words, 50 samples - at ./dataset/gen-word-1035-count.jsonl\n", - "Generated JSONL file with - 1185 max words, 50 samples - at ./dataset/gen-word-1185-count.jsonl\n", - "Generated a single JSONL file with 248 samples (50 token repeat) - 525 max words - at ./dataset/shuffle-word-525-count.jsonl\n", - "Generated JSONL file with - 925 max words, 50 samples - at ./dataset/gen-word-925-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 555 max words - at ./dataset/shuffle-word-555-count.jsonl\n", - "Generated JSONL file with - 1195 max words, 50 samples - at ./dataset/gen-word-1195-count.jsonl\n", - "Generated a single JSONL file with 160 samples (50 token repeat) - 855 max words - at ./dataset/shuffle-word-855-count.jsonl\n", - "Generated a single JSONL file with 295 samples (50 token repeat) - 475 max words - at ./dataset/shuffle-word-475-count.jsonl\n", - "Generated JSONL file with - 1115 max words, 50 samples - at ./dataset/gen-word-1115-count.jsonl\n", - "Generated JSONL file with - 1015 max words, 50 samples - at ./dataset/gen-word-1015-count.jsonl\n", - "Generated JSONL file with - 1220 max words, 50 samples - at ./dataset/gen-word-1220-count.jsonl\n", - "Generated JSONL file with - 1175 max words, 50 samples - at ./dataset/gen-word-1175-count.jsonl\n", - "Generated JSONL file with - 865 max words, 50 samples - at ./dataset/gen-word-865-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1105 max words - at ./dataset/shuffle-word-1105-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", - "Generated a single JSONL file with 201 samples (50 token repeat) - 675 max words - at ./dataset/shuffle-word-675-count.jsonl\n", - "Generated JSONL file with - 1140 max words, 50 samples - at ./dataset/gen-word-1140-count.jsonl\n", - "Generated a single JSONL file with 203 samples (50 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", - "Generated JSONL file with - 880 max words, 50 samples - at ./dataset/gen-word-880-count.jsonl\n", - "Generated a single JSONL file with 157 samples (50 token repeat) - 835 max words - at ./dataset/shuffle-word-835-count.jsonl\n", - "Generated JSONL file with - 940 max words, 50 samples - at ./dataset/gen-word-940-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1075 max words - at ./dataset/shuffle-word-1075-count.jsonl\n", - "Generated JSONL file with - 890 max words, 50 samples - at ./dataset/gen-word-890-count.jsonl\n", - "Generated a single JSONL file with 159 samples (50 token repeat) - 825 max words - at ./dataset/shuffle-word-825-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", - "Generated a single JSONL file with 300 samples (50 token repeat) - 415 max words - at ./dataset/shuffle-word-415-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1135 max words - at ./dataset/shuffle-word-1135-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", - "Generated JSONL file with - 995 max words, 50 samples - at ./dataset/gen-word-995-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 765 max words - at ./dataset/shuffle-word-765-count.jsonl\n", - "Generated a single JSONL file with 201 samples (50 token repeat) - 665 max words - at ./dataset/shuffle-word-665-count.jsonl\n", - "Generated JSONL file with - 1025 max words, 50 samples - at ./dataset/gen-word-1025-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1085 max words - at ./dataset/shuffle-word-1085-count.jsonl\n", - "Generated a single JSONL file with 201 samples (50 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", - "Generated a single JSONL file with 160 samples (50 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", - "Generated JSONL file with - 915 max words, 50 samples - at ./dataset/gen-word-915-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", - "Generated a single JSONL file with 149 samples (50 token repeat) - 1185 max words - at ./dataset/shuffle-word-1185-count.jsonl\n", - "Generated a single JSONL file with 161 samples (50 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", - "Generated JSONL file with - 990 max words, 50 samples - at ./dataset/gen-word-990-count.jsonl\n", - "Generated a single JSONL file with 157 samples (50 token repeat) - 875 max words - at ./dataset/shuffle-word-875-count.jsonl\n", - "Generated JSONL file with - 960 max words, 50 samples - at ./dataset/gen-word-960-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 915 max words - at ./dataset/shuffle-word-915-count.jsonl\n", - "Generated JSONL file with - 1250 max words, 50 samples - at ./dataset/gen-word-1250-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", - "Generated JSONL file with - 945 max words, 50 samples - at ./dataset/gen-word-945-count.jsonl\n", - "Generated a single JSONL file with 292 samples (50 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", - "Generated JSONL file with - 975 max words, 50 samples - at ./dataset/gen-word-975-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1065 max words - at ./dataset/shuffle-word-1065-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 775 max words - at ./dataset/shuffle-word-775-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 695 max words - at ./dataset/shuffle-word-695-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", - "Generated a single JSONL file with 250 samples (50 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", - "Generated JSONL file with - 1030 max words, 50 samples - at ./dataset/gen-word-1030-count.jsonl\n", - "Generated JSONL file with - 1050 max words, 50 samples - at ./dataset/gen-word-1050-count.jsonl\n", - "Generated JSONL file with - 1170 max words, 50 samples - at ./dataset/gen-word-1170-count.jsonl\n", - "Generated JSONL file with - 970 max words, 50 samples - at ./dataset/gen-word-970-count.jsonl\n", - "Generated JSONL file with - 1365 max words, 50 samples - at ./dataset/gen-word-1365-count.jsonl\n", - "Generated a single JSONL file with 149 samples (50 token repeat) - 1195 max words - at ./dataset/shuffle-word-1195-count.jsonl\n", - "Generated JSONL file with - 1130 max words, 50 samples - at ./dataset/gen-word-1130-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", - "Generated a single JSONL file with 162 samples (50 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", - "Generated JSONL file with - 1255 max words, 50 samples - at ./dataset/gen-word-1255-count.jsonl\n", - "Generated JSONL file with - 1110 max words, 50 samples - at ./dataset/gen-word-1110-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", - "Generated JSONL file with - 1230 max words, 50 samples - at ./dataset/gen-word-1230-count.jsonl\n", - "Generated JSONL file with - 1290 max words, 50 samples - at ./dataset/gen-word-1290-count.jsonl\n", - "Generated a single JSONL file with 123 samples (50 token repeat) - 1285 max words - at ./dataset/shuffle-word-1285-count.jsonl\n", - "Generated a single JSONL file with 199 samples (50 token repeat) - 735 max words - at ./dataset/shuffle-word-735-count.jsonl\n", - "Generated JSONL file with - 1135 max words, 50 samples - at ./dataset/gen-word-1135-count.jsonl\n", - "Generated JSONL file with - 1095 max words, 50 samples - at ./dataset/gen-word-1095-count.jsonl\n", - "Generated a single JSONL file with 159 samples (50 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", - "Generated a single JSONL file with 197 samples (50 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 955 max words - at ./dataset/shuffle-word-955-count.jsonl\n", - "Generated a single JSONL file with 201 samples (50 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", - "Generated a single JSONL file with 159 samples (50 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", - "Generated JSONL file with - 1355 max words, 50 samples - at ./dataset/gen-word-1355-count.jsonl\n", - "Generated JSONL file with - 1070 max words, 50 samples - at ./dataset/gen-word-1070-count.jsonl\n", - "Generated JSONL file with - 1065 max words, 50 samples - at ./dataset/gen-word-1065-count.jsonl\n", - "Generated JSONL file with - 1000 max words, 50 samples - at ./dataset/gen-word-1000-count.jsonl\n", - "Generated JSONL file with - 1240 max words, 50 samples - at ./dataset/gen-word-1240-count.jsonl\n", - "Generated a single JSONL file with 160 samples (50 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", - "Generated a single JSONL file with 201 samples (50 token repeat) - 605 max words - at ./dataset/shuffle-word-605-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", - "Generated JSONL file with - 1145 max words, 50 samples - at ./dataset/gen-word-1145-count.jsonl\n", - "Generated JSONL file with - 1125 max words, 50 samples - at ./dataset/gen-word-1125-count.jsonl\n", - "Generated JSONL file with - 1370 max words, 50 samples - at ./dataset/gen-word-1370-count.jsonl\n", - "Generated a single JSONL file with 202 samples (50 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", - "Generated a single JSONL file with 123 samples (50 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1315 max words - at ./dataset/shuffle-word-1315-count.jsonl\n", - "Generated JSONL file with - 1150 max words, 50 samples - at ./dataset/gen-word-1150-count.jsonl\n", - "Generated JSONL file with - 1010 max words, 50 samples - at ./dataset/gen-word-1010-count.jsonl\n", - "Generated a single JSONL file with 159 samples (50 token repeat) - 865 max words - at ./dataset/shuffle-word-865-count.jsonl\n", - "Generated JSONL file with - 1180 max words, 50 samples - at ./dataset/gen-word-1180-count.jsonl\n", - "Generated JSONL file with - 1315 max words, 50 samples - at ./dataset/gen-word-1315-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", - "Generated JSONL file with - 1275 max words, 50 samples - at ./dataset/gen-word-1275-count.jsonl\n", - "Generated a single JSONL file with 199 samples (50 token repeat) - 795 max words - at ./dataset/shuffle-word-795-count.jsonl\n", - "Generated JSONL file with - 1385 max words, 50 samples - at ./dataset/gen-word-1385-count.jsonl\n", - "Generated JSONL file with - 1165 max words, 50 samples - at ./dataset/gen-word-1165-count.jsonl\n", - "Generated JSONL file with - 1270 max words, 50 samples - at ./dataset/gen-word-1270-count.jsonl\n", - "Generated a single JSONL file with 149 samples (50 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", - "Generated a single JSONL file with 158 samples (50 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", - "Generated JSONL file with - 1160 max words, 50 samples - at ./dataset/gen-word-1160-count.jsonl\n", - "Generated JSONL file with - 1300 max words, 50 samples - at ./dataset/gen-word-1300-count.jsonl\n", - "Generated a single JSONL file with 149 samples (50 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", - "Generated JSONL file with - 1380 max words, 50 samples - at ./dataset/gen-word-1380-count.jsonl\n", - "Generated JSONL file with - 1190 max words, 50 samples - at ./dataset/gen-word-1190-count.jsonl\n", - "Generated a single JSONL file with 159 samples (50 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 945 max words - at ./dataset/shuffle-word-945-count.jsonl\n", - "Generated JSONL file with - 1375 max words, 50 samples - at ./dataset/gen-word-1375-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 935 max words - at ./dataset/shuffle-word-935-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 925 max words - at ./dataset/shuffle-word-925-count.jsonl\n", - "Generated a single JSONL file with 129 samples (50 token repeat) - 1265 max words - at ./dataset/shuffle-word-1265-count.jsonl\n", - "Generated a single JSONL file with 200 samples (50 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1115 max words - at ./dataset/shuffle-word-1115-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1055 max words - at ./dataset/shuffle-word-1055-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", - "Generated JSONL file with - 1245 max words, 50 samples - at ./dataset/gen-word-1245-count.jsonl\n", - "Generated JSONL file with - 1485 max words, 50 samples - at ./dataset/gen-word-1485-count.jsonl\n", - "Generated a single JSONL file with 128 samples (50 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", - "Generated JSONL file with - 1345 max words, 50 samples - at ./dataset/gen-word-1345-count.jsonl\n", - "Generated a single JSONL file with 122 samples (50 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", - "Generated a single JSONL file with 101 samples (50 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", - "Generated a single JSONL file with 102 samples (50 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", - "Generated a single JSONL file with 198 samples (50 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", - "Generated a single JSONL file with 149 samples (50 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", - "Generated a single JSONL file with 149 samples (50 token repeat) - 1125 max words - at ./dataset/shuffle-word-1125-count.jsonl\n", - "Generated JSONL file with - 1405 max words, 50 samples - at ./dataset/gen-word-1405-count.jsonl\n", - "Generated JSONL file with - 1305 max words, 50 samples - at ./dataset/gen-word-1305-count.jsonl\n", - "Generated a single JSONL file with 101 samples (50 token repeat) - 1345 max words - at ./dataset/shuffle-word-1345-count.jsonl\n", - "Generated a single JSONL file with 104 samples (50 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", - "Generated a single JSONL file with 101 samples (50 token repeat) - 1375 max words - at ./dataset/shuffle-word-1375-count.jsonl\n", - "Generated a single JSONL file with 135 samples (50 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1025 max words - at ./dataset/shuffle-word-1025-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 965 max words - at ./dataset/shuffle-word-965-count.jsonl\n", - "Generated JSONL file with - 1265 max words, 50 samples - at ./dataset/gen-word-1265-count.jsonl\n", - "Generated JSONL file with - 1200 max words, 50 samples - at ./dataset/gen-word-1200-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", - "Generated JSONL file with - 1205 max words, 50 samples - at ./dataset/gen-word-1205-count.jsonl\n", - "Generated JSONL file with - 1210 max words, 50 samples - at ./dataset/gen-word-1210-count.jsonl\n", - "Generated a single JSONL file with 125 samples (50 token repeat) - 1235 max words - at ./dataset/shuffle-word-1235-count.jsonl\n", - "Generated a single JSONL file with 149 samples (50 token repeat) - 1165 max words - at ./dataset/shuffle-word-1165-count.jsonl\n", - "Generated JSONL file with - 1320 max words, 50 samples - at ./dataset/gen-word-1320-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1035 max words - at ./dataset/shuffle-word-1035-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 975 max words - at ./dataset/shuffle-word-975-count.jsonl\n", - "Generated JSONL file with - 1280 max words, 50 samples - at ./dataset/gen-word-1280-count.jsonl\n", - "Generated JSONL file with - 1350 max words, 50 samples - at ./dataset/gen-word-1350-count.jsonl\n", - "Generated a single JSONL file with 102 samples (50 token repeat) - 1325 max words - at ./dataset/shuffle-word-1325-count.jsonl\n", - "Generated JSONL file with - 1225 max words, 50 samples - at ./dataset/gen-word-1225-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1435 max words - at ./dataset/shuffle-word-1435-count.jsonlGenerated JSONL file with - 1440 max words, 50 samples - at ./dataset/gen-word-1440-count.jsonl\n", - "\n", - "Generated JSONL file with - 1310 max words, 50 samples - at ./dataset/gen-word-1310-count.jsonl\n", - "Generated JSONL file with - 1075 max words, 50 samples - at ./dataset/gen-word-1075-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", - "Generated JSONL file with - 1235 max words, 50 samples - at ./dataset/gen-word-1235-count.jsonlGenerated a single JSONL file with 119 samples (50 token repeat) - 1245 max words - at ./dataset/shuffle-word-1245-count.jsonl\n", - "\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", - "Generated JSONL file with - 1475 max words, 50 samples - at ./dataset/gen-word-1475-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", - "Generated JSONL file with - 1215 max words, 50 samples - at ./dataset/gen-word-1215-count.jsonl\n", - "Generated JSONL file with - 1470 max words, 50 samples - at ./dataset/gen-word-1470-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1455 max words - at ./dataset/shuffle-word-1455-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", - "Generated a single JSONL file with 121 samples (50 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1095 max words - at ./dataset/shuffle-word-1095-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1175 max words - at ./dataset/shuffle-word-1175-count.jsonl\n", - "Generated a single JSONL file with 127 samples (50 token repeat) - 1205 max words - at ./dataset/shuffle-word-1205-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1355 max words - at ./dataset/shuffle-word-1355-count.jsonl\n", - "Generated JSONL file with - 1435 max words, 50 samples - at ./dataset/gen-word-1435-count.jsonl\n", - "Generated JSONL file with - 1390 max words, 50 samples - at ./dataset/gen-word-1390-count.jsonl\n", - "Generated a single JSONL file with 102 samples (50 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 985 max words - at ./dataset/shuffle-word-985-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", - "Generated JSONL file with - 1395 max words, 50 samples - at ./dataset/gen-word-1395-count.jsonl\n", - "Generated a single JSONL file with 120 samples (50 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonlGenerated a single JSONL file with 150 samples (50 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", - "\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 995 max words - at ./dataset/shuffle-word-995-count.jsonl\n", - "Generated JSONL file with - 1510 max words, 100 samples - at ./dataset/gen-word-1510-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1465 max words - at ./dataset/shuffle-word-1465-count.jsonl\n", - "Generated a single JSONL file with 123 samples (50 token repeat) - 1295 max words - at ./dataset/shuffle-word-1295-count.jsonl\n", - "Generated JSONL file with - 1330 max words, 50 samples - at ./dataset/gen-word-1330-count.jsonl\n", - "Generated JSONL file with - 1675 max words, 100 samples - at ./dataset/gen-word-1675-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1145 max words - at ./dataset/shuffle-word-1145-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", - "Generated JSONL file with - 1420 max words, 50 samples - at ./dataset/gen-word-1420-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1045 max words - at ./dataset/shuffle-word-1045-count.jsonl\n", - "Generated JSONL file with - 1600 max words, 100 samples - at ./dataset/gen-word-1600-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1445 max words - at ./dataset/shuffle-word-1445-count.jsonl\n", - "Generated JSONL file with - 1460 max words, 50 samples - at ./dataset/gen-word-1460-count.jsonl\n", - "Generated a single JSONL file with 101 samples (50 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", - "Generated JSONL file with - 1465 max words, 50 samples - at ./dataset/gen-word-1465-count.jsonl\n", - "Generated JSONL file with - 1760 max words, 100 samples - at ./dataset/gen-word-1760-count.jsonl\n", - "Generated JSONL file with - 1335 max words, 50 samples - at ./dataset/gen-word-1335-count.jsonl\n", - "Generated JSONL file with - 1360 max words, 50 samples - at ./dataset/gen-word-1360-count.jsonl\n", - "Generated JSONL file with - 1340 max words, 50 samples - at ./dataset/gen-word-1340-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1495 max words - at ./dataset/shuffle-word-1495-count.jsonl\n", - "Generated JSONL file with - 1410 max words, 50 samples - at ./dataset/gen-word-1410-count.jsonl\n", - "Generated JSONL file with - 1455 max words, 50 samples - at ./dataset/gen-word-1455-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1535 max words - at ./dataset/shuffle-word-1535-count.jsonl\n", - "Generated JSONL file with - 1425 max words, 50 samples - at ./dataset/gen-word-1425-count.jsonl\n", - "Generated JSONL file with - 1560 max words, 100 samples - at ./dataset/gen-word-1560-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1475 max words - at ./dataset/shuffle-word-1475-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1525 max words - at ./dataset/shuffle-word-1525-count.jsonlGenerated a single JSONL file with 124 samples (50 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", - "\n", - "Generated JSONL file with - 1550 max words, 100 samples - at ./dataset/gen-word-1550-count.jsonl\n", - "\n", - "Generated JSONL file with - 1605 max words, 100 samples - at ./dataset/gen-word-1605-count.jsonl\n", - "Generated a single JSONL file with 101 samples (50 token repeat) - 1365 max words - at ./dataset/shuffle-word-1365-count.jsonl\n", - "Generated JSONL file with - 1645 max words, 100 samples - at ./dataset/gen-word-1645-count.jsonl\n", - "Generated JSONL file with - 1525 max words, 100 samples - at ./dataset/gen-word-1525-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1405 max words - at ./dataset/shuffle-word-1405-count.jsonl\n", - "Generated a single JSONL file with 102 samples (50 token repeat) - 1385 max words - at ./dataset/shuffle-word-1385-count.jsonl\n", - "Generated JSONL file with - 1750 max words, 100 samples - at ./dataset/gen-word-1750-count.jsonl\n", - "Generated JSONL file with - 1635 max words, 100 samples - at ./dataset/gen-word-1635-count.jsonl\n", - "Generated JSONL file with - 1490 max words, 50 samples - at ./dataset/gen-word-1490-count.jsonl\n", - "Generated JSONL file with - 1500 max words, 50 samples - at ./dataset/gen-word-1500-count.jsonl\n", - "Generated a single JSONL file with 127 samples (50 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", - "Generated JSONL file with - 1285 max words, 50 samples - at ./dataset/gen-word-1285-count.jsonl\n", - "Generated JSONL file with - 1610 max words, 100 samples - at ./dataset/gen-word-1610-count.jsonl\n", - "Generated JSONL file with - 1655 max words, 100 samples - at ./dataset/gen-word-1655-count.jsonl\n", - "Generated a single JSONL file with 15669 samples (500 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", - "Generated a single JSONL file with 129 samples (50 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1415 max words - at ./dataset/shuffle-word-1415-count.jsonl\n", - "Generated JSONL file with - 1710 max words, 100 samples - at ./dataset/gen-word-1710-count.jsonl\n", - "Generated JSONL file with - 1505 max words, 100 samples - at ./dataset/gen-word-1505-count.jsonl\n", - "Generated a single JSONL file with 17680 samples (500 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", - "Generated JSONL file with - 1295 max words, 50 samples - at ./dataset/gen-word-1295-count.jsonl\n", - "Generated JSONL file with - 1260 max words, 50 samples - at ./dataset/gen-word-1260-count.jsonl\n", - "Generated JSONL file with - 1540 max words, 100 samples - at ./dataset/gen-word-1540-count.jsonl\n", - "Generated a single JSONL file with 102 samples (50 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", - "Generated JSONL file with - 1480 max words, 50 samples - at ./dataset/gen-word-1480-count.jsonl\n", - "Generated a single JSONL file with 150 samples (50 token repeat) - 1155 max words - at ./dataset/shuffle-word-1155-count.jsonl\n", - "Generated JSONL file with - 1700 max words, 100 samples - at ./dataset/gen-word-1700-count.jsonl\n", - "Generated JSONL file with - 1400 max words, 50 samples - at ./dataset/gen-word-1400-count.jsonl\n", - "Generated JSONL file with - 1450 max words, 50 samples - at ./dataset/gen-word-1450-count.jsonl\n", - "Generated JSONL file with - 1715 max words, 100 samples - at ./dataset/gen-word-1715-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1425 max words - at ./dataset/shuffle-word-1425-count.jsonl\n", - "Generated JSONL file with - 1665 max words, 100 samples - at ./dataset/gen-word-1665-count.jsonl\n", - "Generated JSONL file with - 1495 max words, 50 samples - at ./dataset/gen-word-1495-count.jsonl\n", - "Generated JSONL file with - 1585 max words, 100 samples - at ./dataset/gen-word-1585-count.jsonl\n", - "Generated JSONL file with - 1325 max words, 50 samples - at ./dataset/gen-word-1325-count.jsonl\n", - "Generated JSONL file with - 1755 max words, 100 samples - at ./dataset/gen-word-1755-count.jsonl\n", - "Generated JSONL file with - 1530 max words, 100 samples - at ./dataset/gen-word-1530-count.jsonl\n", - "Generated JSONL file with - 1650 max words, 100 samples - at ./dataset/gen-word-1650-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", - "Generated JSONL file with - 1720 max words, 100 samples - at ./dataset/gen-word-1720-count.jsonl\n", - "Generated JSONL file with - 1740 max words, 100 samples - at ./dataset/gen-word-1740-count.jsonl\n", - "Generated JSONL file with - 2050 max words, 100 samples - at ./dataset/gen-word-2050-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", - "Generated JSONL file with - 1630 max words, 100 samples - at ./dataset/gen-word-1630-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", - "Generated JSONL file with - 1580 max words, 100 samples - at ./dataset/gen-word-1580-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonl\n", - "Generated JSONL file with - 1925 max words, 100 samples - at ./dataset/gen-word-1925-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1725 max words - at ./dataset/shuffle-word-1725-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", - "Generated JSONL file with - 1845 max words, 100 samples - at ./dataset/gen-word-1845-count.jsonl\n", - "Generated a single JSONL file with 124 samples (50 token repeat) - 1215 max words - at ./dataset/shuffle-word-1215-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1755 max words - at ./dataset/shuffle-word-1755-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2135 max words - at ./dataset/shuffle-word-2135-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", - "Generated a single JSONL file with 118 samples (50 token repeat) - 1275 max words - at ./dataset/shuffle-word-1275-count.jsonl\n", - "Generated JSONL file with - 1730 max words, 100 samples - at ./dataset/gen-word-1730-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1605 max words - at ./dataset/shuffle-word-1605-count.jsonl\n", - "Generated JSONL file with - 1915 max words, 100 samples - at ./dataset/gen-word-1915-count.jsonl\n", - "Generated a single JSONL file with 130 samples (50 token repeat) - 1225 max words - at ./dataset/shuffle-word-1225-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1545 max words - at ./dataset/shuffle-word-1545-count.jsonl\n", - "Generated JSONL file with - 1445 max words, 50 samples - at ./dataset/gen-word-1445-count.jsonlGenerated JSONL file with - 1790 max words, 100 samples - at ./dataset/gen-word-1790-count.jsonl\n", - "\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", - "Generated a single JSONL file with 133 samples (50 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", - "Generated JSONL file with - 1695 max words, 100 samples - at ./dataset/gen-word-1695-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1505 max words - at ./dataset/shuffle-word-1505-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1565 max words - at ./dataset/shuffle-word-1565-count.jsonl\n", - "Generated JSONL file with - 1895 max words, 100 samples - at ./dataset/gen-word-1895-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", - "Generated JSONL file with - 1800 max words, 100 samples - at ./dataset/gen-word-1800-count.jsonl\n", - "Generated JSONL file with - 1885 max words, 100 samples - at ./dataset/gen-word-1885-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2235 max words - at ./dataset/shuffle-word-2235-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", - "Generated JSONL file with - 1780 max words, 100 samples - at ./dataset/gen-word-1780-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1395 max words - at ./dataset/shuffle-word-1395-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1695 max words - at ./dataset/shuffle-word-1695-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1585 max words - at ./dataset/shuffle-word-1585-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", - "Generated a single JSONL file with 148 samples (50 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", - "Generated a single JSONL file with 101 samples (50 token repeat) - 1305 max words - at ./dataset/shuffle-word-1305-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1335 max words - at ./dataset/shuffle-word-1335-count.jsonl\n", - "Generated JSONL file with - 1785 max words, 100 samples - at ./dataset/gen-word-1785-count.jsonl\n", - "Generated JSONL file with - 1775 max words, 100 samples - at ./dataset/gen-word-1775-count.jsonl\n", - "Generated JSONL file with - 2055 max words, 100 samples - at ./dataset/gen-word-2055-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1895 max words - at ./dataset/shuffle-word-1895-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2155 max words - at ./dataset/shuffle-word-2155-count.jsonl\n", - "Generated JSONL file with - 1950 max words, 100 samples - at ./dataset/gen-word-1950-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1635 max words - at ./dataset/shuffle-word-1635-count.jsonlGenerated JSONL file with - 1570 max words, 100 samples - at ./dataset/gen-word-1570-count.jsonl\n", - "\n", - "Generated JSONL file with - 2150 max words, 100 samples - at ./dataset/gen-word-2150-count.jsonl\n", - "Generated JSONL file with - 1680 max words, 100 samples - at ./dataset/gen-word-1680-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", - "Generated JSONL file with - 1620 max words, 100 samples - at ./dataset/gen-word-1620-count.jsonl\n", - "Generated a single JSONL file with 121 samples (50 token repeat) - 1255 max words - at ./dataset/shuffle-word-1255-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", - "Generated JSONL file with - 1590 max words, 100 samples - at ./dataset/gen-word-1590-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", - "Generated JSONL file with - 2315 max words, 100 samples - at ./dataset/gen-word-2315-count.jsonl\n", - "Generated JSONL file with - 1865 max words, 100 samples - at ./dataset/gen-word-1865-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2045 max words - at ./dataset/shuffle-word-2045-count.jsonl\n", - "Generated JSONL file with - 1820 max words, 100 samples - at ./dataset/gen-word-1820-count.jsonl\n", - "Generated JSONL file with - 2095 max words, 100 samples - at ./dataset/gen-word-2095-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1775 max words - at ./dataset/shuffle-word-1775-count.jsonl\n", - "Generated JSONL file with - 1725 max words, 100 samples - at ./dataset/gen-word-1725-count.jsonlGenerated JSONL file with - 1935 max words, 100 samples - at ./dataset/gen-word-1935-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", - "Generated JSONL file with - 1685 max words, 100 samples - at ./dataset/gen-word-1685-count.jsonl\n", - "Generated JSONL file with - 1690 max words, 100 samples - at ./dataset/gen-word-1690-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1685 max words - at ./dataset/shuffle-word-1685-count.jsonl\n", - "Generated JSONL file with - 1415 max words, 50 samples - at ./dataset/gen-word-1415-count.jsonl\n", - "Generated JSONL file with - 2185 max words, 100 samples - at ./dataset/gen-word-2185-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", - "Generated JSONL file with - 2100 max words, 100 samples - at ./dataset/gen-word-2100-count.jsonl\n", - "Generated JSONL file with - 1840 max words, 100 samples - at ./dataset/gen-word-1840-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", - "Generated JSONL file with - 2105 max words, 100 samples - at ./dataset/gen-word-2105-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", - "Generated JSONL file with - 2010 max words, 100 samples - at ./dataset/gen-word-2010-count.jsonl\n", - "Generated JSONL file with - 2145 max words, 100 samples - at ./dataset/gen-word-2145-count.jsonl\n", - "\n", - "Generated JSONL file with - 1870 max words, 100 samples - at ./dataset/gen-word-1870-count.jsonl\n", - "Generated JSONL file with - 1430 max words, 50 samples - at ./dataset/gen-word-1430-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2035 max words - at ./dataset/shuffle-word-2035-count.jsonl\n", - "Generated JSONL file with - 1520 max words, 100 samples - at ./dataset/gen-word-1520-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", - "Generated JSONL file with - 1965 max words, 100 samples - at ./dataset/gen-word-1965-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1645 max words - at ./dataset/shuffle-word-1645-count.jsonl\n", - "Generated JSONL file with - 2215 max words, 100 samples - at ./dataset/gen-word-2215-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1825 max words - at ./dataset/shuffle-word-1825-count.jsonlGenerated JSONL file with - 1660 max words, 100 samples - at ./dataset/gen-word-1660-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2225 max words - at ./dataset/shuffle-word-2225-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1905 max words - at ./dataset/shuffle-word-1905-count.jsonl\n", - "Generated JSONL file with - 1735 max words, 100 samples - at ./dataset/gen-word-1735-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", - "Generated JSONL file with - 1905 max words, 100 samples - at ./dataset/gen-word-1905-count.jsonl\n", - "\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonlGenerated JSONL file with - 2030 max words, 100 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated JSONL file with - 2 max words, 100 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 10 max words, 100 samples - at ./dataset/gen-word-10-count.jsonl\n", + "Generated JSONL file with - 15 max words, 100 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 20 max words, 100 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 25 max words, 100 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 4 max words, 100 samples - at ./dataset/word-4-count.jsonl\n", + "Generated JSONL file with - 5 max words, 100 samples - at ./dataset/gen-word-5-count.jsonl\n", + "Generated JSONL file with - 55 max words, 100 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 45 max words, 100 samples - at ./dataset/gen-word-45-count.jsonlGenerated JSONL file with - 50 max words, 100 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 70 max words, 100 samples - at ./dataset/gen-word-70-count.jsonl\n", "\n", - "Generated JSONL file with - 1880 max words, 100 samples - at ./dataset/gen-word-1880-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", - "Generated JSONL file with - 1670 max words, 100 samples - at ./dataset/gen-word-1670-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", - "Generated JSONL file with - 1945 max words, 100 samples - at ./dataset/gen-word-1945-count.jsonl\n", - "Generated JSONL file with - 1855 max words, 100 samples - at ./dataset/gen-word-1855-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2115 max words - at ./dataset/shuffle-word-2115-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1745 max words - at ./dataset/shuffle-word-1745-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonlGenerated a single JSONL file with 199 samples (100 token repeat) - 2385 max words - at ./dataset/shuffle-word-2385-count.jsonl\n", - "Generated JSONL file with - 1805 max words, 100 samples - at ./dataset/gen-word-1805-count.jsonl\n", - "Generated JSONL file with - 1835 max words, 100 samples - at ./dataset/gen-word-1835-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2295 max words - at ./dataset/shuffle-word-2295-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 2275 max words - at ./dataset/shuffle-word-2275-count.jsonl\n", - "Generated a single JSONL file with 181 samples (100 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "Generated JSONL file with - 30 max words, 100 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 65 max words, 100 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 60 max words, 100 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 35 max words, 100 samples - at ./dataset/gen-word-35-count.jsonlGenerated JSONL file with - 90 max words, 100 samples - at ./dataset/gen-word-90-count.jsonl\n", "\n", - "Generated a single JSONL file with 188 samples (100 token repeat) - 2465 max words - at ./dataset/shuffle-word-2465-count.jsonl\n", - "Generated JSONL file with - 1900 max words, 100 samples - at ./dataset/gen-word-1900-count.jsonlGenerated JSONL file with - 1615 max words, 100 samples - at ./dataset/gen-word-1615-count.jsonl\n", - "Generated JSONL file with - 1930 max words, 100 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "Generated JSONL file with - 40 max words, 100 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 80 max words, 100 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 85 max words, 100 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 75 max words, 100 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 120 max words, 75 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 180 max words, 75 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 210 max words, 75 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 130 max words, 75 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 170 max words, 75 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 340 max words, 75 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 150 max words, 75 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 460 max words, 75 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated JSONL file with - 510 max words, 75 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 95 max words, 100 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 520 max words, 75 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated JSONL file with - 350 max words, 75 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated JSONL file with - 430 max words, 75 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 470 max words, 75 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated JSONL file with - 330 max words, 75 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 100 max words, 100 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 220 max words, 75 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 280 max words, 75 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated JSONL file with - 240 max words, 75 samples - at ./dataset/gen-word-240-count.jsonl\n", + "Generated JSONL file with - 160 max words, 75 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated JSONL file with - 290 max words, 75 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 200 max words, 75 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 390 max words, 75 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated JSONL file with - 320 max words, 75 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 550 max words, 75 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated JSONL file with - 190 max words, 75 samples - at ./dataset/gen-word-190-count.jsonlGenerated JSONL file with - 450 max words, 75 samples - at ./dataset/gen-word-450-count.jsonl\n", "\n", - "Generated JSONL file with - 2495 max words, 100 samples - at ./dataset/gen-word-2495-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1715 max words - at ./dataset/shuffle-word-1715-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1995 max words - at ./dataset/shuffle-word-1995-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1865 max words - at ./dataset/shuffle-word-1865-count.jsonl\n", - "Generated JSONL file with - 2135 max words, 100 samples - at ./dataset/gen-word-2135-count.jsonl\n", - "Generated a single JSONL file with 100 samples (50 token repeat) - 1485 max words - at ./dataset/shuffle-word-1485-count.jsonlGenerated JSONL file with - 1575 max words, 100 samples - at ./dataset/gen-word-1575-count.jsonl\n", - "Generated JSONL file with - 1535 max words, 100 samples - at ./dataset/gen-word-1535-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", - "Generated JSONL file with - 2120 max words, 100 samples - at ./dataset/gen-word-2120-count.jsonl\n", - "Generated a single JSONL file with 196 samples (100 token repeat) - 2335 max words - at ./dataset/shuffle-word-2335-count.jsonl\n", - "Generated JSONL file with - 1515 max words, 100 samples - at ./dataset/gen-word-1515-count.jsonl\n", - "Generated JSONL file with - 2265 max words, 100 samples - at ./dataset/gen-word-2265-count.jsonl\n", - "Generated a single JSONL file with 151 samples (100 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2125 max words - at ./dataset/shuffle-word-2125-count.jsonl\n", - "Generated JSONL file with - 2005 max words, 100 samples - at ./dataset/gen-word-2005-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1965 max words - at ./dataset/shuffle-word-1965-count.jsonl\n", - "\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2065 max words - at ./dataset/shuffle-word-2065-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2095 max words - at ./dataset/shuffle-word-2095-count.jsonlGenerated JSONL file with - 2060 max words, 100 samples - at ./dataset/gen-word-2060-count.jsonl\n", - "\n", - "Generated JSONL file with - 1975 max words, 100 samples - at ./dataset/gen-word-1975-count.jsonl\n", - "Generated JSONL file with - 2285 max words, 100 samples - at ./dataset/gen-word-2285-count.jsonl\n", - "Generated JSONL file with - 2170 max words, 100 samples - at ./dataset/gen-word-2170-count.jsonl\n", - "\n", - "Generated JSONL file with - 1595 max words, 100 samples - at ./dataset/gen-word-1595-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1985 max words - at ./dataset/shuffle-word-1985-count.jsonl\n", - "Generated a single JSONL file with 176 samples (100 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonlGenerated JSONL file with - 2175 max words, 100 samples - at ./dataset/gen-word-2175-count.jsonl\n", - "Generated JSONL file with - 2225 max words, 100 samples - at ./dataset/gen-word-2225-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2205 max words - at ./dataset/shuffle-word-2205-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 2075 max words - at ./dataset/shuffle-word-2075-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", - "Generated JSONL file with - 2230 max words, 100 samples - at ./dataset/gen-word-2230-count.jsonl\n", - "Generated JSONL file with - 1810 max words, 100 samples - at ./dataset/gen-word-1810-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", - "Generated a single JSONL file with 197 samples (100 token repeat) - 2365 max words - at ./dataset/shuffle-word-2365-count.jsonl\n", - "\n", - "Generated a single JSONL file with 115 samples (100 token repeat) - 2665 max words - at ./dataset/shuffle-word-2665-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonlGenerated JSONL file with - 2065 max words, 100 samples - at ./dataset/gen-word-2065-count.jsonl\n", - "Generated JSONL file with - 2380 max words, 100 samples - at ./dataset/gen-word-2380-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", - "Generated JSONL file with - 1795 max words, 100 samples - at ./dataset/gen-word-1795-count.jsonl\n", - "Generated JSONL file with - 1565 max words, 100 samples - at ./dataset/gen-word-1565-count.jsonl\n", - "\n", - "\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1935 max words - at ./dataset/shuffle-word-1935-count.jsonl\n", - "Generated JSONL file with - 2515 max words, 100 samples - at ./dataset/gen-word-2515-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", - "Generated JSONL file with - 1815 max words, 100 samples - at ./dataset/gen-word-1815-count.jsonl\n", - "Generated JSONL file with - 2245 max words, 100 samples - at ./dataset/gen-word-2245-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", - "Generated JSONL file with - 2305 max words, 100 samples - at ./dataset/gen-word-2305-count.jsonl\n", - "Generated JSONL file with - 2350 max words, 100 samples - at ./dataset/gen-word-2350-count.jsonl\n", - "\n", - "Generated JSONL file with - 1640 max words, 100 samples - at ./dataset/gen-word-1640-count.jsonl\n", - "\n", - "Generated JSONL file with - 2340 max words, 100 samples - at ./dataset/gen-word-2340-count.jsonlGenerated a single JSONL file with 184 samples (100 token repeat) - 2475 max words - at ./dataset/shuffle-word-2475-count.jsonl\n", - "\n", - "\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2345 max words - at ./dataset/shuffle-word-2345-count.jsonl\n", - "Generated JSONL file with - 2295 max words, 100 samples - at ./dataset/gen-word-2295-count.jsonl\n", - "Generated JSONL file with - 1705 max words, 100 samples - at ./dataset/gen-word-1705-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1885 max words - at ./dataset/shuffle-word-1885-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", - "Generated JSONL file with - 2195 max words, 100 samples - at ./dataset/gen-word-2195-count.jsonl\n", - "Generated JSONL file with - 2255 max words, 100 samples - at ./dataset/gen-word-2255-count.jsonl\n", - "Generated JSONL file with - 2260 max words, 100 samples - at ./dataset/gen-word-2260-count.jsonl\n", - "Generated a single JSONL file with 192 samples (100 token repeat) - 2425 max words - at ./dataset/shuffle-word-2425-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2195 max words - at ./dataset/shuffle-word-2195-count.jsonl\n", - "Generated JSONL file with - 1555 max words, 100 samples - at ./dataset/gen-word-1555-count.jsonl\n", - "Generated JSONL file with - 2545 max words, 100 samples - at ./dataset/gen-word-2545-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1625 max words - at ./dataset/shuffle-word-1625-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1595 max words - at ./dataset/shuffle-word-1595-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", - "Generated JSONL file with - 2045 max words, 100 samples - at ./dataset/gen-word-2045-count.jsonl\n", - "Generated JSONL file with - 1910 max words, 100 samples - at ./dataset/gen-word-1910-count.jsonl\n", - "Generated JSONL file with - 2400 max words, 100 samples - at ./dataset/gen-word-2400-count.jsonl\n", - "\n", - "\n", - "Generated JSONL file with - 2025 max words, 100 samples - at ./dataset/gen-word-2025-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1925 max words - at ./dataset/shuffle-word-1925-count.jsonl\n", - "Generated a single JSONL file with 184 samples (100 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", - "Generated JSONL file with - 2695 max words, 100 samples - at ./dataset/gen-word-2695-count.jsonl\n", - "Generated JSONL file with - 1940 max words, 100 samples - at ./dataset/gen-word-1940-count.jsonl\n", - "Generated JSONL file with - 1850 max words, 100 samples - at ./dataset/gen-word-1850-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2175 max words - at ./dataset/shuffle-word-2175-count.jsonl\n", - "Generated JSONL file with - 1875 max words, 100 samples - at ./dataset/gen-word-1875-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", - "Generated JSONL file with - 1765 max words, 100 samples - at ./dataset/gen-word-1765-count.jsonl\n", - "Generated JSONL file with - 1745 max words, 100 samples - at ./dataset/gen-word-1745-count.jsonl\n", - "Generated a single JSONL file with 190 samples (100 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1705 max words - at ./dataset/shuffle-word-1705-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2085 max words - at ./dataset/shuffle-word-2085-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 1805 max words - at ./dataset/shuffle-word-1805-count.jsonl\n", + "Generated JSONL file with - 540 max words, 75 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated a single JSONL file with 1290 samples (75 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated JSONL file with - 560 max words, 75 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated JSONL file with - 600 max words, 75 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated JSONL file with - 440 max words, 75 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated JSONL file with - 360 max words, 75 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated a single JSONL file with 689 samples (75 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated JSONL file with - 570 max words, 75 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated a single JSONL file with 1387 samples (75 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated JSONL file with - 530 max words, 75 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated JSONL file with - 380 max words, 75 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated a single JSONL file with 1155 samples (75 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated JSONL file with - 490 max words, 75 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated JSONL file with - 270 max words, 75 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 620 max words, 75 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated a single JSONL file with 438 samples (75 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated a single JSONL file with 694 samples (75 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", + "Generated a single JSONL file with 1025 samples (75 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated a single JSONL file with 1099 samples (75 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated a single JSONL file with 1058 samples (75 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "Generated JSONL file with - 780 max words, 75 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated JSONL file with - 820 max words, 75 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated JSONL file with - 480 max words, 75 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 260 max words, 75 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated a single JSONL file with 754 samples (75 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated a single JSONL file with 685 samples (75 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated a single JSONL file with 2635 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated a single JSONL file with 530 samples (75 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated a single JSONL file with 527 samples (75 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated JSONL file with - 250 max words, 75 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 960 max words, 75 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "Generated a single JSONL file with 446 samples (75 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated JSONL file with - 370 max words, 75 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated a single JSONL file with 530 samples (75 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated JSONL file with - 310 max words, 75 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated a single JSONL file with 3558 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated a single JSONL file with 2945 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated JSONL file with - 110 max words, 75 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 640 max words, 75 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated a single JSONL file with 525 samples (75 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated JSONL file with - 230 max words, 75 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 590 max words, 75 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated a single JSONL file with 1030 samples (75 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated JSONL file with - 660 max words, 75 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated JSONL file with - 580 max words, 75 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated JSONL file with - 500 max words, 75 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated JSONL file with - 300 max words, 75 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated a single JSONL file with 4085 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", + "Generated a single JSONL file with 535 samples (75 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "Generated JSONL file with - 650 max words, 75 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated JSONL file with - 670 max words, 75 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated JSONL file with - 140 max words, 75 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated a single JSONL file with 2789 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated JSONL file with - 610 max words, 75 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated a single JSONL file with 728 samples (75 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated a single JSONL file with 691 samples (75 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated a single JSONL file with 3791 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated JSONL file with - 690 max words, 75 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated a single JSONL file with 5899 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated a single JSONL file with 5249 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated a single JSONL file with 3299 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated JSONL file with - 410 max words, 75 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated JSONL file with - 630 max words, 75 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated JSONL file with - 880 max words, 75 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated JSONL file with - 420 max words, 75 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated JSONL file with - 830 max words, 75 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated JSONL file with - 700 max words, 75 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated a single JSONL file with 707 samples (75 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated a single JSONL file with 7559 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated JSONL file with - 890 max words, 75 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 372 samples (75 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated a single JSONL file with 4390 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 1494 samples (75 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated a single JSONL file with 4825 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated JSONL file with - 860 max words, 75 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 790 max words, 75 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated a single JSONL file with 6532 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated JSONL file with - 400 max words, 75 samples - at ./dataset/gen-word-400-count.jsonl\n", + "Generated a single JSONL file with 434 samples (75 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 75 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated a single JSONL file with 8753 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated a single JSONL file with 793 samples (75 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated a single JSONL file with 749 samples (75 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated a single JSONL file with 1006 samples (75 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated a single JSONL file with 1217 samples (75 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated JSONL file with - 850 max words, 75 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 75 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated JSONL file with - 810 max words, 75 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated JSONL file with - 970 max words, 75 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated a single JSONL file with 3132 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 75 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated JSONL file with - 680 max words, 75 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated a single JSONL file with 304 samples (75 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated JSONL file with - 800 max words, 75 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated a single JSONL file with 196 samples (75 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated a single JSONL file with 686 samples (75 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated a single JSONL file with 13087 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated JSONL file with - 740 max words, 75 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated a single JSONL file with 531 samples (75 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated JSONL file with - 910 max words, 75 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated JSONL file with - 1750 max words, 75 samples - at ./dataset/gen-word-1750-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 75 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated JSONL file with - 760 max words, 75 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated JSONL file with - 710 max words, 75 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 75 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated a single JSONL file with 10610 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated a single JSONL file with 236 samples (75 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated JSONL file with - 770 max words, 75 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated JSONL file with - 900 max words, 75 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated JSONL file with - 1700 max words, 75 samples - at ./dataset/gen-word-1700-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 75 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 75 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated JSONL file with - 990 max words, 75 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated a single JSONL file with 237 samples (75 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated a single JSONL file with 237 samples (75 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated JSONL file with - 870 max words, 75 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 75 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 75 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 75 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 75 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated a single JSONL file with 230 samples (75 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 75 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 75 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 75 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated JSONL file with - 720 max words, 75 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated JSONL file with - 750 max words, 75 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated JSONL file with - 2970 max words, 75 samples - at ./dataset/gen-word-2970-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated a single JSONL file with 297 samples (75 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 75 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated a single JSONL file with 17810 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 184 samples (75 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated a single JSONL file with 26091 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated JSONL file with - 940 max words, 75 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 243 samples (75 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 75 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 75 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 75 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated JSONL file with - 840 max words, 75 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated JSONL file with - 930 max words, 75 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated JSONL file with - 980 max words, 75 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", + "Generated JSONL file with - 730 max words, 75 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated JSONL file with - 920 max words, 75 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 75 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 75 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 75 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 75 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated a single JSONL file with 231 samples (75 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 75 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 75 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated JSONL file with - 950 max words, 75 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated a single JSONL file with 234 samples (75 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated a single JSONL file with 240 samples (75 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 75 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 75 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 75 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 75 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 75 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated a single JSONL file with 184 samples (75 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "Generated a single JSONL file with 188 samples (75 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", + "Generated a single JSONL file with 184 samples (75 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", + "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonlGenerated JSONL file with - 1240 max words, 75 samples - at ./dataset/gen-word-1240-count.jsonl\n", "\n", - "Generated JSONL file with - 2585 max words, 100 samples - at ./dataset/gen-word-2585-count.jsonl\n", - "Generated JSONL file with - 1545 max words, 100 samples - at ./dataset/gen-word-1545-count.jsonl\n", - "Generated a single JSONL file with 140 samples (100 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", - "Generated JSONL file with - 1625 max words, 100 samples - at ./dataset/gen-word-1625-count.jsonlGenerated a single JSONL file with 200 samples (100 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonl\n", + "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated JSONL file with - 1560 max words, 75 samples - at ./dataset/gen-word-1560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated JSONL file with - 2100 max words, 75 samples - at ./dataset/gen-word-2100-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 75 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", + "Generated JSONL file with - 1950 max words, 75 samples - at ./dataset/gen-word-1950-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 75 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 75 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated a single JSONL file with 191 samples (75 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 75 samples - at ./dataset/gen-word-1320-count.jsonlGenerated JSONL file with - 2650 max words, 75 samples - at ./dataset/gen-word-2650-count.jsonl\n", "\n", - "Generated a single JSONL file with 184 samples (100 token repeat) - 2455 max words - at ./dataset/shuffle-word-2455-count.jsonl\n", - "Generated JSONL file with - 2580 max words, 100 samples - at ./dataset/gen-word-2580-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", - "Generated JSONL file with - 2040 max words, 100 samples - at ./dataset/gen-word-2040-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", - "Generated JSONL file with - 2540 max words, 100 samples - at ./dataset/gen-word-2540-count.jsonl\n", - "Generated JSONL file with - 2210 max words, 100 samples - at ./dataset/gen-word-2210-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1665 max words - at ./dataset/shuffle-word-1665-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1655 max words - at ./dataset/shuffle-word-1655-count.jsonl\n", - "Generated a single JSONL file with 104 samples (100 token repeat) - 2735 max words - at ./dataset/shuffle-word-2735-count.jsonl\n", - "Generated JSONL file with - 1920 max words, 100 samples - at ./dataset/gen-word-1920-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1675 max words - at ./dataset/shuffle-word-1675-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1815 max words - at ./dataset/shuffle-word-1815-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1845 max words - at ./dataset/shuffle-word-1845-count.jsonl\n", - "Generated JSONL file with - 2395 max words, 100 samples - at ./dataset/gen-word-2395-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1855 max words - at ./dataset/shuffle-word-1855-count.jsonl\n", - "Generated JSONL file with - 2410 max words, 100 samples - at ./dataset/gen-word-2410-count.jsonl\n", - "Generated JSONL file with - 1860 max words, 100 samples - at ./dataset/gen-word-1860-count.jsonl\n", - "Generated JSONL file with - 1995 max words, 100 samples - at ./dataset/gen-word-1995-count.jsonl\n", - "Generated JSONL file with - 1955 max words, 100 samples - at ./dataset/gen-word-1955-count.jsonl\n", - "Generated JSONL file with - 2665 max words, 100 samples - at ./dataset/gen-word-2665-count.jsonl\n", - "Generated JSONL file with - 2130 max words, 100 samples - at ./dataset/gen-word-2130-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1575 max words - at ./dataset/shuffle-word-1575-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1975 max words - at ./dataset/shuffle-word-1975-count.jsonl\n", - "Generated JSONL file with - 2610 max words, 100 samples - at ./dataset/gen-word-2610-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1875 max words - at ./dataset/shuffle-word-1875-count.jsonl\n", - "Generated JSONL file with - 2165 max words, 100 samples - at ./dataset/gen-word-2165-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1735 max words - at ./dataset/shuffle-word-1735-count.jsonl\n", - "Generated JSONL file with - 1830 max words, 100 samples - at ./dataset/gen-word-1830-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1835 max words - at ./dataset/shuffle-word-1835-count.jsonl\n", - "Generated JSONL file with - 2080 max words, 100 samples - at ./dataset/gen-word-2080-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", - "Generated a single JSONL file with 116 samples (100 token repeat) - 2625 max words - at ./dataset/shuffle-word-2625-count.jsonl\n", - "Generated JSONL file with - 1770 max words, 100 samples - at ./dataset/gen-word-1770-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1515 max words - at ./dataset/shuffle-word-1515-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1615 max words - at ./dataset/shuffle-word-1615-count.jsonl\n", - "Generated JSONL file with - 2320 max words, 100 samples - at ./dataset/gen-word-2320-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1785 max words - at ./dataset/shuffle-word-1785-count.jsonl\n", - "Generated JSONL file with - 1990 max words, 100 samples - at ./dataset/gen-word-1990-count.jsonl\n", - "Generated a single JSONL file with 152 samples (100 token repeat) - 2565 max words - at ./dataset/shuffle-word-2565-count.jsonl\n", - "Generated JSONL file with - 2510 max words, 100 samples - at ./dataset/gen-word-2510-count.jsonl\n", - "Generated JSONL file with - 1825 max words, 100 samples - at ./dataset/gen-word-1825-count.jsonl\n", - "Generated JSONL file with - 2435 max words, 100 samples - at ./dataset/gen-word-2435-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", - "Generated JSONL file with - 2035 max words, 100 samples - at ./dataset/gen-word-2035-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1945 max words - at ./dataset/shuffle-word-1945-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2245 max words - at ./dataset/shuffle-word-2245-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", - "Generated JSONL file with - 2015 max words, 100 samples - at ./dataset/gen-word-2015-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2395 max words - at ./dataset/shuffle-word-2395-count.jsonl\n", - "Generated JSONL file with - 2615 max words, 100 samples - at ./dataset/gen-word-2615-count.jsonl\n", - "Generated JSONL file with - 2770 max words, 100 samples - at ./dataset/gen-word-2770-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2375 max words - at ./dataset/shuffle-word-2375-count.jsonl\n", - "Generated a single JSONL file with 144 samples (100 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", - "Generated JSONL file with - 2600 max words, 100 samples - at ./dataset/gen-word-2600-count.jsonl\n", - "Generated JSONL file with - 2560 max words, 100 samples - at ./dataset/gen-word-2560-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", - "Generated JSONL file with - 1970 max words, 100 samples - at ./dataset/gen-word-1970-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2005 max words - at ./dataset/shuffle-word-2005-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1765 max words - at ./dataset/shuffle-word-1765-count.jsonl\n", - "Generated JSONL file with - 2125 max words, 100 samples - at ./dataset/gen-word-2125-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1555 max words - at ./dataset/shuffle-word-1555-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2105 max words - at ./dataset/shuffle-word-2105-count.jsonl\n", - "Generated JSONL file with - 2000 max words, 100 samples - at ./dataset/gen-word-2000-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", - "Generated a single JSONL file with 37746 samples (500 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", - "Generated JSONL file with - 1960 max words, 100 samples - at ./dataset/gen-word-1960-count.jsonl\n", - "Generated JSONL file with - 2110 max words, 100 samples - at ./dataset/gen-word-2110-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", - "Generated JSONL file with - 2160 max words, 100 samples - at ./dataset/gen-word-2160-count.jsonl\n", - "Generated JSONL file with - 2115 max words, 100 samples - at ./dataset/gen-word-2115-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", - "Generated JSONL file with - 2595 max words, 100 samples - at ./dataset/gen-word-2595-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1795 max words - at ./dataset/shuffle-word-1795-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2215 max words - at ./dataset/shuffle-word-2215-count.jsonl\n", - "Generated JSONL file with - 2590 max words, 100 samples - at ./dataset/gen-word-2590-count.jsonl\n", - "Generated JSONL file with - 2290 max words, 100 samples - at ./dataset/gen-word-2290-count.jsonl\n", - "Generated JSONL file with - 2140 max words, 100 samples - at ./dataset/gen-word-2140-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2025 max words - at ./dataset/shuffle-word-2025-count.jsonl\n", - "Generated JSONL file with - 2200 max words, 100 samples - at ./dataset/gen-word-2200-count.jsonl\n", - "Generated JSONL file with - 2180 max words, 100 samples - at ./dataset/gen-word-2180-count.jsonl\n", - "Generated JSONL file with - 2275 max words, 100 samples - at ./dataset/gen-word-2275-count.jsonl\n", - "Generated JSONL file with - 2575 max words, 100 samples - at ./dataset/gen-word-2575-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2015 max words - at ./dataset/shuffle-word-2015-count.jsonl\n", - "Generated a single JSONL file with 197 samples (100 token repeat) - 2305 max words - at ./dataset/shuffle-word-2305-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2165 max words - at ./dataset/shuffle-word-2165-count.jsonl\n", - "Generated a single JSONL file with 175 samples (100 token repeat) - 2435 max words - at ./dataset/shuffle-word-2435-count.jsonl\n", - "Generated JSONL file with - 1890 max words, 100 samples - at ./dataset/gen-word-1890-count.jsonl\n", - "Generated a single JSONL file with 197 samples (100 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", - "Generated a single JSONL file with 150 samples (100 token repeat) - 2595 max words - at ./dataset/shuffle-word-2595-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2315 max words - at ./dataset/shuffle-word-2315-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", - "Generated a single JSONL file with 183 samples (100 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", - "Generated JSONL file with - 2070 max words, 100 samples - at ./dataset/gen-word-2070-count.jsonl\n", - "Generated JSONL file with - 2300 max words, 100 samples - at ./dataset/gen-word-2300-count.jsonl\n", - "Generated a single JSONL file with 194 samples (100 token repeat) - 2325 max words - at ./dataset/shuffle-word-2325-count.jsonl\n", - "Generated JSONL file with - 1985 max words, 100 samples - at ./dataset/gen-word-1985-count.jsonl\n", - "Generated JSONL file with - 2020 max words, 100 samples - at ./dataset/gen-word-2020-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1955 max words - at ./dataset/shuffle-word-1955-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", - "Generated JSONL file with - 2075 max words, 100 samples - at ./dataset/gen-word-2075-count.jsonl\n", - "Generated JSONL file with - 2530 max words, 100 samples - at ./dataset/gen-word-2530-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2145 max words - at ./dataset/shuffle-word-2145-count.jsonl\n", - "Generated JSONL file with - 2155 max words, 100 samples - at ./dataset/gen-word-2155-count.jsonl\n", - "Generated a single JSONL file with 159 samples (100 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", - "Generated JSONL file with - 1980 max words, 100 samples - at ./dataset/gen-word-1980-count.jsonl\n", - "Generated JSONL file with - 2425 max words, 100 samples - at ./dataset/gen-word-2425-count.jsonl\n", - "Generated JSONL file with - 2220 max words, 100 samples - at ./dataset/gen-word-2220-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 1915 max words - at ./dataset/shuffle-word-1915-count.jsonl\n", - "Generated a single JSONL file with 184 samples (100 token repeat) - 2415 max words - at ./dataset/shuffle-word-2415-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3045 max words - at ./dataset/shuffle-word-3045-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2055 max words - at ./dataset/shuffle-word-2055-count.jsonl\n", - "Generated JSONL file with - 2465 max words, 100 samples - at ./dataset/gen-word-2465-count.jsonl\n", - "Generated JSONL file with - 2620 max words, 100 samples - at ./dataset/gen-word-2620-count.jsonl\n", - "Generated JSONL file with - 2420 max words, 100 samples - at ./dataset/gen-word-2420-count.jsonl\n", - "Generated JSONL file with - 2355 max words, 100 samples - at ./dataset/gen-word-2355-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2265 max words - at ./dataset/shuffle-word-2265-count.jsonl\n", - "Generated a single JSONL file with 119 samples (100 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", - "Generated JSONL file with - 2360 max words, 100 samples - at ./dataset/gen-word-2360-count.jsonl\n", - "Generated a single JSONL file with 156 samples (100 token repeat) - 2545 max words - at ./dataset/shuffle-word-2545-count.jsonl\n", - "Generated JSONL file with - 2330 max words, 100 samples - at ./dataset/gen-word-2330-count.jsonl\n", - "Generated a single JSONL file with 149 samples (100 token repeat) - 2505 max words - at ./dataset/shuffle-word-2505-count.jsonl\n", - "Generated a single JSONL file with 150 samples (100 token repeat) - 2535 max words - at ./dataset/shuffle-word-2535-count.jsonl\n", - "Generated JSONL file with - 2535 max words, 100 samples - at ./dataset/gen-word-2535-count.jsonl\n", - "Generated JSONL file with - 2090 max words, 100 samples - at ./dataset/gen-word-2090-count.jsonl\n", - "Generated JSONL file with - 2415 max words, 100 samples - at ./dataset/gen-word-2415-count.jsonl\n", - "Generated JSONL file with - 2440 max words, 100 samples - at ./dataset/gen-word-2440-count.jsonl\n", - "Generated JSONL file with - 2375 max words, 100 samples - at ./dataset/gen-word-2375-count.jsonl\n", - "Generated JSONL file with - 2430 max words, 100 samples - at ./dataset/gen-word-2430-count.jsonl\n", - "Generated JSONL file with - 2250 max words, 100 samples - at ./dataset/gen-word-2250-count.jsonl\n", - "Generated JSONL file with - 2550 max words, 100 samples - at ./dataset/gen-word-2550-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2885 max words - at ./dataset/shuffle-word-2885-count.jsonl\n", - "Generated a single JSONL file with 184 samples (100 token repeat) - 2445 max words - at ./dataset/shuffle-word-2445-count.jsonl\n", - "Generated JSONL file with - 2085 max words, 100 samples - at ./dataset/gen-word-2085-count.jsonl\n", - "Generated JSONL file with - 2310 max words, 100 samples - at ./dataset/gen-word-2310-count.jsonl\n", - "Generated a single JSONL file with 121 samples (100 token repeat) - 2645 max words - at ./dataset/shuffle-word-2645-count.jsonl\n", - "Generated JSONL file with - 2280 max words, 100 samples - at ./dataset/gen-word-2280-count.jsonl\n", - "Generated a single JSONL file with 150 samples (100 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", - "Generated JSONL file with - 2235 max words, 100 samples - at ./dataset/gen-word-2235-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", - "Generated JSONL file with - 2270 max words, 100 samples - at ./dataset/gen-word-2270-count.jsonl\n", - "Generated JSONL file with - 2190 max words, 100 samples - at ./dataset/gen-word-2190-count.jsonl\n", - "Generated a single JSONL file with 191 samples (100 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", - "Generated JSONL file with - 2390 max words, 100 samples - at ./dataset/gen-word-2390-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2185 max words - at ./dataset/shuffle-word-2185-count.jsonl\n", - "Generated JSONL file with - 2460 max words, 100 samples - at ./dataset/gen-word-2460-count.jsonl\n", - "Generated a single JSONL file with 113 samples (100 token repeat) - 2605 max words - at ./dataset/shuffle-word-2605-count.jsonl\n", - "Generated JSONL file with - 2570 max words, 100 samples - at ./dataset/gen-word-2570-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2865 max words - at ./dataset/shuffle-word-2865-count.jsonl\n", - "Generated JSONL file with - 2655 max words, 100 samples - at ./dataset/gen-word-2655-count.jsonl\n", - "Generated JSONL file with - 2930 max words, 100 samples - at ./dataset/gen-word-2930-count.jsonl\n", - "Generated JSONL file with - 2735 max words, 100 samples - at ./dataset/gen-word-2735-count.jsonl\n", - "Generated JSONL file with - 2450 max words, 100 samples - at ./dataset/gen-word-2450-count.jsonl\n", - "Generated a single JSONL file with 179 samples (100 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", - "Generated JSONL file with - 2475 max words, 100 samples - at ./dataset/gen-word-2475-count.jsonl\n", - "Generated a single JSONL file with 151 samples (100 token repeat) - 2585 max words - at ./dataset/shuffle-word-2585-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", - "Generated JSONL file with - 2890 max words, 100 samples - at ./dataset/gen-word-2890-count.jsonlGenerated a single JSONL file with 152 samples (100 token repeat) - 2555 max words - at ./dataset/shuffle-word-2555-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", + "Generated a single JSONL file with 140 samples (75 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", + "Generated JSONL file with - 2660 max words, 75 samples - at ./dataset/gen-word-2660-count.jsonl\n", + "Generated a single JSONL file with 114 samples (75 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated a single JSONL file with 188 samples (75 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated a single JSONL file with 154 samples (75 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 75 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated JSONL file with - 1310 max words, 75 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 75 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 75 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated JSONL file with - 1440 max words, 75 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 75 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 75 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 75 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 75 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 75 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 75 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 75 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 75 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 75 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated a single JSONL file with 183 samples (75 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated a single JSONL file with 55738 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "Generated JSONL file with - 1520 max words, 75 samples - at ./dataset/gen-word-1520-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", + "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 75 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 75 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 75 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated a single JSONL file with 241 samples (75 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1600 max words, 75 samples - at ./dataset/gen-word-1600-count.jsonl\n", + "Generated JSONL file with - 1530 max words, 75 samples - at ./dataset/gen-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", + "Generated JSONL file with - 1660 max words, 75 samples - at ./dataset/gen-word-1660-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", + "Generated JSONL file with - 1540 max words, 75 samples - at ./dataset/gen-word-1540-count.jsonl\n", + "Generated JSONL file with - 1690 max words, 75 samples - at ./dataset/gen-word-1690-count.jsonl\n", + "Generated JSONL file with - 1510 max words, 75 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated JSONL file with - 1590 max words, 75 samples - at ./dataset/gen-word-1590-count.jsonl\n", + "Generated JSONL file with - 1550 max words, 75 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", + "Generated JSONL file with - 1740 max words, 75 samples - at ./dataset/gen-word-1740-count.jsonl\n", + "Generated JSONL file with - 1580 max words, 75 samples - at ./dataset/gen-word-1580-count.jsonlGenerated JSONL file with - 1670 max words, 75 samples - at ./dataset/gen-word-1670-count.jsonl\n", "\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", - "Generated JSONL file with - 2705 max words, 100 samples - at ./dataset/gen-word-2705-count.jsonl\n", - "Generated a single JSONL file with 197 samples (100 token repeat) - 2355 max words - at ./dataset/shuffle-word-2355-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3060 max words - at ./dataset/shuffle-word-3060-count.jsonl\n", - "Generated JSONL file with - 2445 max words, 100 samples - at ./dataset/gen-word-2445-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", - "Generated JSONL file with - 2795 max words, 100 samples - at ./dataset/gen-word-2795-count.jsonl\n", - "Generated a single JSONL file with 187 samples (100 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", - "Generated JSONL file with - 2385 max words, 100 samples - at ./dataset/gen-word-2385-count.jsonl\n", - "Generated JSONL file with - 2650 max words, 100 samples - at ./dataset/gen-word-2650-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", - "Generated a single JSONL file with 118 samples (100 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", - "Generated JSONL file with - 2470 max words, 100 samples - at ./dataset/gen-word-2470-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2285 max words - at ./dataset/shuffle-word-2285-count.jsonl\n", - "Generated JSONL file with - 2205 max words, 100 samples - at ./dataset/gen-word-2205-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", - "Generated JSONL file with - 2325 max words, 100 samples - at ./dataset/gen-word-2325-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2955 max words - at ./dataset/shuffle-word-2955-count.jsonl\n", - "Generated JSONL file with - 2625 max words, 100 samples - at ./dataset/gen-word-2625-count.jsonl\n", - "Generated JSONL file with - 2405 max words, 100 samples - at ./dataset/gen-word-2405-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3520 max words - at ./dataset/shuffle-word-3520-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", - "Generated JSONL file with - 2240 max words, 100 samples - at ./dataset/gen-word-2240-count.jsonl\n", - "Generated a single JSONL file with 26337 samples (500 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2875 max words - at ./dataset/shuffle-word-2875-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3540 max words - at ./dataset/shuffle-word-3540-count.jsonl\n", - "Generated JSONL file with - 2365 max words, 100 samples - at ./dataset/gen-word-2365-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", - "Generated a single JSONL file with 184 samples (100 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", - "Generated JSONL file with - 2565 max words, 100 samples - at ./dataset/gen-word-2565-count.jsonl\n", - "Generated a single JSONL file with 148 samples (100 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", - "Generated a single JSONL file with 117 samples (100 token repeat) - 2615 max words - at ./dataset/shuffle-word-2615-count.jsonl\n", - "Generated JSONL file with - 2605 max words, 100 samples - at ./dataset/gen-word-2605-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3510 max words - at ./dataset/shuffle-word-3510-count.jsonl\n", - "Generated a single JSONL file with 183 samples (100 token repeat) - 2405 max words - at ./dataset/shuffle-word-2405-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3345 max words - at ./dataset/shuffle-word-3345-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3090 max words - at ./dataset/shuffle-word-3090-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2965 max words - at ./dataset/shuffle-word-2965-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3530 max words - at ./dataset/shuffle-word-3530-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3080 max words - at ./dataset/shuffle-word-3080-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3505 max words - at ./dataset/shuffle-word-3505-count.jsonl\n", - "Generated JSONL file with - 3085 max words, 100 samples - at ./dataset/gen-word-3085-count.jsonl\n", - "Generated JSONL file with - 2920 max words, 100 samples - at ./dataset/gen-word-2920-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3120 max words - at ./dataset/shuffle-word-3120-count.jsonl\n", - "Generated JSONL file with - 2345 max words, 100 samples - at ./dataset/gen-word-2345-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3495 max words - at ./dataset/shuffle-word-3495-count.jsonl\n", - "Generated a single JSONL file with 200 samples (100 token repeat) - 2255 max words - at ./dataset/shuffle-word-2255-count.jsonl\n", - "Generated JSONL file with - 3545 max words, 100 samples - at ./dataset/gen-word-3545-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3170 max words - at ./dataset/shuffle-word-3170-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3515 max words - at ./dataset/shuffle-word-3515-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", - "Generated JSONL file with - 3470 max words, 100 samples - at ./dataset/gen-word-3470-count.jsonl\n", - "Generated a single JSONL file with 187 samples (100 token repeat) - 2495 max words - at ./dataset/shuffle-word-2495-count.jsonl\n", - "Generated a single JSONL file with 161 samples (100 token repeat) - 2525 max words - at ./dataset/shuffle-word-2525-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3635 max words - at ./dataset/shuffle-word-3635-count.jsonl\n", - "Generated a single JSONL file with 13274 samples (500 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", + "Generated JSONL file with - 1620 max words, 75 samples - at ./dataset/gen-word-1620-count.jsonl\n", + "Generated JSONL file with - 1760 max words, 75 samples - at ./dataset/gen-word-1760-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", + "Generated JSONL file with - 1640 max words, 75 samples - at ./dataset/gen-word-1640-count.jsonl\n", + "Generated JSONL file with - 1790 max words, 75 samples - at ./dataset/gen-word-1790-count.jsonl\n", + "Generated JSONL file with - 1630 max words, 75 samples - at ./dataset/gen-word-1630-count.jsonl\n", + "Generated JSONL file with - 1770 max words, 75 samples - at ./dataset/gen-word-1770-count.jsonl\n", + "Generated JSONL file with - 1840 max words, 75 samples - at ./dataset/gen-word-1840-count.jsonl\n", + "Generated JSONL file with - 1610 max words, 75 samples - at ./dataset/gen-word-1610-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", + "Generated JSONL file with - 1800 max words, 75 samples - at ./dataset/gen-word-1800-count.jsonl\n", + "Generated JSONL file with - 1680 max words, 75 samples - at ./dataset/gen-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", + "Generated JSONL file with - 1720 max words, 75 samples - at ./dataset/gen-word-1720-count.jsonl\n", + "Generated JSONL file with - 1570 max words, 75 samples - at ./dataset/gen-word-1570-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", + "Generated JSONL file with - 1850 max words, 75 samples - at ./dataset/gen-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", + "Generated JSONL file with - 1780 max words, 75 samples - at ./dataset/gen-word-1780-count.jsonl\n", + "Generated JSONL file with - 1820 max words, 75 samples - at ./dataset/gen-word-1820-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", + "Generated JSONL file with - 1730 max words, 75 samples - at ./dataset/gen-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", + "Generated JSONL file with - 1810 max words, 75 samples - at ./dataset/gen-word-1810-count.jsonl\n", + "Generated JSONL file with - 1710 max words, 75 samples - at ./dataset/gen-word-1710-count.jsonl\n", + "Generated JSONL file with - 1870 max words, 75 samples - at ./dataset/gen-word-1870-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", + "Generated JSONL file with - 1880 max words, 75 samples - at ./dataset/gen-word-1880-count.jsonl\n", + "Generated JSONL file with - 1860 max words, 75 samples - at ./dataset/gen-word-1860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", + "Generated JSONL file with - 1830 max words, 75 samples - at ./dataset/gen-word-1830-count.jsonl\n", + "Generated JSONL file with - 2310 max words, 75 samples - at ./dataset/gen-word-2310-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonl\n", + "Generated JSONL file with - 1910 max words, 75 samples - at ./dataset/gen-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", + "Generated JSONL file with - 1650 max words, 75 samples - at ./dataset/gen-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", + "Generated JSONL file with - 1960 max words, 75 samples - at ./dataset/gen-word-1960-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", + "Generated JSONL file with - 1890 max words, 75 samples - at ./dataset/gen-word-1890-count.jsonl\n", + "Generated JSONL file with - 2050 max words, 75 samples - at ./dataset/gen-word-2050-count.jsonl\n", + "Generated JSONL file with - 2440 max words, 75 samples - at ./dataset/gen-word-2440-count.jsonl\n", + "Generated JSONL file with - 1900 max words, 75 samples - at ./dataset/gen-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated JSONL file with - 2480 max words, 75 samples - at ./dataset/gen-word-2480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", + "Generated JSONL file with - 1930 max words, 75 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "Generated JSONL file with - 1940 max words, 75 samples - at ./dataset/gen-word-1940-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", + "Generated JSONL file with - 2330 max words, 75 samples - at ./dataset/gen-word-2330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", + "Generated JSONL file with - 2170 max words, 75 samples - at ./dataset/gen-word-2170-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", + "Generated JSONL file with - 2500 max words, 75 samples - at ./dataset/gen-word-2500-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", + "Generated a single JSONL file with 139 samples (75 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", + "Generated JSONL file with - 2580 max words, 75 samples - at ./dataset/gen-word-2580-count.jsonl\n", + "Generated JSONL file with - 1970 max words, 75 samples - at ./dataset/gen-word-1970-count.jsonl\n", + "Generated JSONL file with - 2000 max words, 75 samples - at ./dataset/gen-word-2000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", + "Generated JSONL file with - 1920 max words, 75 samples - at ./dataset/gen-word-1920-count.jsonl\n", + "Generated JSONL file with - 1980 max words, 75 samples - at ./dataset/gen-word-1980-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonl\n", + "Generated JSONL file with - 1990 max words, 75 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", + "Generated JSONL file with - 2820 max words, 75 samples - at ./dataset/gen-word-2820-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", - "Generated JSONL file with - 3560 max words, 100 samples - at ./dataset/gen-word-3560-count.jsonl\n", - "Generated JSONL file with - 3490 max words, 100 samples - at ./dataset/gen-word-3490-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3490 max words - at ./dataset/shuffle-word-3490-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3670 max words - at ./dataset/shuffle-word-3670-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3240 max words - at ./dataset/shuffle-word-3240-count.jsonl\n", - "Generated a single JSONL file with 118 samples (100 token repeat) - 2635 max words - at ./dataset/shuffle-word-2635-count.jsonl\n", - "Generated a single JSONL file with 13999 samples (500 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", - "Generated JSONL file with - 3535 max words, 100 samples - at ./dataset/gen-word-3535-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3105 max words - at ./dataset/shuffle-word-3105-count.jsonl\n", - "Generated JSONL file with - 3495 max words, 100 samples - at ./dataset/gen-word-3495-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3590 max words - at ./dataset/shuffle-word-3590-count.jsonl\n", - "\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3355 max words - at ./dataset/shuffle-word-3355-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3735 max words - at ./dataset/shuffle-word-3735-count.jsonl\n", - "Generated JSONL file with - 3680 max words, 100 samples - at ./dataset/gen-word-3680-count.jsonl\n", - "Generated JSONL file with - 2335 max words, 100 samples - at ./dataset/gen-word-2335-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3790 max words - at ./dataset/shuffle-word-3790-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3145 max words - at ./dataset/shuffle-word-3145-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3110 max words - at ./dataset/shuffle-word-3110-count.jsonl\n", - "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", - "Generated JSONL file with - 3655 max words, 100 samples - at ./dataset/gen-word-3655-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", - "Generated a single JSONL file with 116 samples (100 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3615 max words - at ./dataset/shuffle-word-3615-count.jsonl\n", - "Generated a single JSONL file with 32726 samples (500 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", - "Generated JSONL file with - 2975 max words, 100 samples - at ./dataset/gen-word-2975-count.jsonl\n", - "Generated JSONL file with - 3510 max words, 100 samples - at ./dataset/gen-word-3510-count.jsonl\n", - "Generated JSONL file with - 3540 max words, 100 samples - at ./dataset/gen-word-3540-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3655 max words - at ./dataset/shuffle-word-3655-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3730 max words - at ./dataset/shuffle-word-3730-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3570 max words - at ./dataset/shuffle-word-3570-count.jsonlGenerated JSONL file with - 3705 max words, 100 samples - at ./dataset/gen-word-3705-count.jsonl\n", - "\n", - "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", - "Generated JSONL file with - 3715 max words, 100 samples - at ./dataset/gen-word-3715-count.jsonl\n", - "Generated JSONL file with - 3780 max words, 100 samples - at ./dataset/gen-word-3780-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3545 max words - at ./dataset/shuffle-word-3545-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3555 max words - at ./dataset/shuffle-word-3555-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3705 max words - at ./dataset/shuffle-word-3705-count.jsonl\n", - "Generated JSONL file with - 3745 max words, 100 samples - at ./dataset/gen-word-3745-count.jsonlGenerated JSONL file with - 3610 max words, 100 samples - at ./dataset/gen-word-3610-count.jsonl\n", - "\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3755 max words - at ./dataset/shuffle-word-3755-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3180 max words - at ./dataset/shuffle-word-3180-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3760 max words - at ./dataset/shuffle-word-3760-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3480 max words - at ./dataset/shuffle-word-3480-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", - "Generated JSONL file with - 3735 max words, 100 samples - at ./dataset/gen-word-3735-count.jsonl\n", - "Generated JSONL file with - 3095 max words, 100 samples - at ./dataset/gen-word-3095-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3005 max words - at ./dataset/shuffle-word-3005-count.jsonl\n", - "Generated JSONL file with - 3765 max words, 100 samples - at ./dataset/gen-word-3765-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3155 max words - at ./dataset/shuffle-word-3155-count.jsonl\n", - "Generated JSONL file with - 3015 max words, 100 samples - at ./dataset/gen-word-3015-count.jsonl\n", - "Generated JSONL file with - 3440 max words, 100 samples - at ./dataset/gen-word-3440-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3780 max words - at ./dataset/shuffle-word-3780-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated JSONL file with - 2010 max words, 75 samples - at ./dataset/gen-word-2010-count.jsonl\n", + "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", + "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", + "Generated JSONL file with - 2030 max words, 75 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated a single JSONL file with 140 samples (75 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", + "Generated JSONL file with - 2550 max words, 75 samples - at ./dataset/gen-word-2550-count.jsonl\n", + "Generated JSONL file with - 2020 max words, 75 samples - at ./dataset/gen-word-2020-count.jsonl\n", + "Generated JSONL file with - 2040 max words, 75 samples - at ./dataset/gen-word-2040-count.jsonl\n", + "Generated JSONL file with - 2590 max words, 75 samples - at ./dataset/gen-word-2590-count.jsonl\n", + "Generated JSONL file with - 2060 max words, 75 samples - at ./dataset/gen-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", + "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", + "Generated JSONL file with - 2110 max words, 75 samples - at ./dataset/gen-word-2110-count.jsonl\n", + "Generated JSONL file with - 2140 max words, 75 samples - at ./dataset/gen-word-2140-count.jsonl\n", + "Generated JSONL file with - 2080 max words, 75 samples - at ./dataset/gen-word-2080-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonl\n", + "Generated JSONL file with - 2570 max words, 75 samples - at ./dataset/gen-word-2570-count.jsonl\n", + "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", + "Generated a single JSONL file with 79 samples (75 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonl\n", + "Generated JSONL file with - 2220 max words, 75 samples - at ./dataset/gen-word-2220-count.jsonl\n", + "Generated JSONL file with - 2120 max words, 75 samples - at ./dataset/gen-word-2120-count.jsonl\n", + "Generated JSONL file with - 2190 max words, 75 samples - at ./dataset/gen-word-2190-count.jsonl\n", + "Generated JSONL file with - 2180 max words, 75 samples - at ./dataset/gen-word-2180-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", - "Generated JSONL file with - 3045 max words, 100 samples - at ./dataset/gen-word-3045-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3630 max words - at ./dataset/shuffle-word-3630-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3305 max words - at ./dataset/shuffle-word-3305-count.jsonl\n", - "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", - "Generated JSONL file with - 3710 max words, 100 samples - at ./dataset/gen-word-3710-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3535 max words - at ./dataset/shuffle-word-3535-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3740 max words - at ./dataset/shuffle-word-3740-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", + "Generated JSONL file with - 2230 max words, 75 samples - at ./dataset/gen-word-2230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", + "Generated JSONL file with - 2090 max words, 75 samples - at ./dataset/gen-word-2090-count.jsonl\n", + "Generated JSONL file with - 2270 max words, 75 samples - at ./dataset/gen-word-2270-count.jsonl\n", + "Generated JSONL file with - 2290 max words, 75 samples - at ./dataset/gen-word-2290-count.jsonl\n", + "Generated JSONL file with - 2260 max words, 75 samples - at ./dataset/gen-word-2260-count.jsonl\n", + "Generated JSONL file with - 2200 max words, 75 samples - at ./dataset/gen-word-2200-count.jsonl\n", + "Generated JSONL file with - 2130 max words, 75 samples - at ./dataset/gen-word-2130-count.jsonl\n", + "Generated JSONL file with - 2250 max words, 75 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", + "Generated JSONL file with - 2300 max words, 75 samples - at ./dataset/gen-word-2300-count.jsonl\n", + "Generated JSONL file with - 2150 max words, 75 samples - at ./dataset/gen-word-2150-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonl\n", + "Generated JSONL file with - 2240 max words, 75 samples - at ./dataset/gen-word-2240-count.jsonl\n", + "Generated JSONL file with - 2160 max words, 75 samples - at ./dataset/gen-word-2160-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", + "Generated a single JSONL file with 133 samples (75 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", + "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", + "Generated JSONL file with - 2280 max words, 75 samples - at ./dataset/gen-word-2280-count.jsonl\n", + "Generated JSONL file with - 2070 max words, 75 samples - at ./dataset/gen-word-2070-count.jsonl\n", + "Generated JSONL file with - 2360 max words, 75 samples - at ./dataset/gen-word-2360-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated JSONL file with - 2420 max words, 75 samples - at ./dataset/gen-word-2420-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", + "Generated JSONL file with - 2390 max words, 75 samples - at ./dataset/gen-word-2390-count.jsonl\n", + "Generated JSONL file with - 2470 max words, 75 samples - at ./dataset/gen-word-2470-count.jsonl\n", + "Generated a single JSONL file with 141 samples (75 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "Generated a single JSONL file with 116 samples (75 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", + "Generated JSONL file with - 2520 max words, 75 samples - at ./dataset/gen-word-2520-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonl\n", + "Generated JSONL file with - 2510 max words, 75 samples - at ./dataset/gen-word-2510-count.jsonl\n", + "Generated JSONL file with - 2450 max words, 75 samples - at ./dataset/gen-word-2450-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", + "Generated a single JSONL file with 116 samples (75 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", + "Generated a single JSONL file with 115 samples (75 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", + "Generated a single JSONL file with 107 samples (75 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", + "Generated a single JSONL file with 115 samples (75 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", + "Generated a single JSONL file with 119 samples (75 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", + "Generated a single JSONL file with 112 samples (75 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", + "Generated a single JSONL file with 85 samples (75 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", + "Generated JSONL file with - 2600 max words, 75 samples - at ./dataset/gen-word-2600-count.jsonl\n", + "Generated JSONL file with - 2540 max words, 75 samples - at ./dataset/gen-word-2540-count.jsonl\n", + "Generated JSONL file with - 2700 max words, 75 samples - at ./dataset/gen-word-2700-count.jsonl\n", + "Generated a single JSONL file with 111 samples (75 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", + "Generated a single JSONL file with 139 samples (75 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", + "Generated JSONL file with - 2340 max words, 75 samples - at ./dataset/gen-word-2340-count.jsonl\n", + "Generated JSONL file with - 2530 max words, 75 samples - at ./dataset/gen-word-2530-count.jsonl\n", + "Generated JSONL file with - 2400 max words, 75 samples - at ./dataset/gen-word-2400-count.jsonl\n", + "Generated a single JSONL file with 113 samples (75 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", + "Generated JSONL file with - 2620 max words, 75 samples - at ./dataset/gen-word-2620-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", + "Generated JSONL file with - 2610 max words, 75 samples - at ./dataset/gen-word-2610-count.jsonl\n", + "Generated JSONL file with - 2490 max words, 75 samples - at ./dataset/gen-word-2490-count.jsonl\n", + "Generated a single JSONL file with 84 samples (75 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", + "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", + "Generated JSONL file with - 2690 max words, 75 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", + "Generated JSONL file with - 2370 max words, 75 samples - at ./dataset/gen-word-2370-count.jsonl\n", + "Generated JSONL file with - 2210 max words, 75 samples - at ./dataset/gen-word-2210-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", + "Generated JSONL file with - 2560 max words, 75 samples - at ./dataset/gen-word-2560-count.jsonl\n", + "Generated JSONL file with - 2640 max words, 75 samples - at ./dataset/gen-word-2640-count.jsonl\n", + "Generated JSONL file with - 2730 max words, 75 samples - at ./dataset/gen-word-2730-count.jsonl\n", + "Generated JSONL file with - 2350 max words, 75 samples - at ./dataset/gen-word-2350-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", + "Generated JSONL file with - 2670 max words, 75 samples - at ./dataset/gen-word-2670-count.jsonl\n", + "Generated JSONL file with - 2680 max words, 75 samples - at ./dataset/gen-word-2680-count.jsonl\n", + "Generated JSONL file with - 2380 max words, 75 samples - at ./dataset/gen-word-2380-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", + "Generated JSONL file with - 2710 max words, 75 samples - at ./dataset/gen-word-2710-count.jsonl\n", + "Generated a single JSONL file with 91 samples (75 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", + "Generated JSONL file with - 2320 max words, 75 samples - at ./dataset/gen-word-2320-count.jsonl\n", + "Generated JSONL file with - 2720 max words, 75 samples - at ./dataset/gen-word-2720-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", + "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", + "Generated JSONL file with - 2770 max words, 75 samples - at ./dataset/gen-word-2770-count.jsonl\n", + "Generated JSONL file with - 2460 max words, 75 samples - at ./dataset/gen-word-2460-count.jsonl\n", + "Generated JSONL file with - 2760 max words, 75 samples - at ./dataset/gen-word-2760-count.jsonl\n", + "Generated JSONL file with - 2740 max words, 75 samples - at ./dataset/gen-word-2740-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", + "Generated JSONL file with - 2430 max words, 75 samples - at ./dataset/gen-word-2430-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", + "Generated JSONL file with - 2630 max words, 75 samples - at ./dataset/gen-word-2630-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", + "Generated JSONL file with - 2750 max words, 75 samples - at ./dataset/gen-word-2750-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated JSONL file with - 2790 max words, 75 samples - at ./dataset/gen-word-2790-count.jsonl\n", + "Generated JSONL file with - 2410 max words, 75 samples - at ./dataset/gen-word-2410-count.jsonl\n", + "Generated JSONL file with - 2800 max words, 75 samples - at ./dataset/gen-word-2800-count.jsonl\n", + "Generated JSONL file with - 2780 max words, 75 samples - at ./dataset/gen-word-2780-count.jsonl\n", + "Generated JSONL file with - 2830 max words, 75 samples - at ./dataset/gen-word-2830-count.jsonl\n", + "Generated a single JSONL file with 141 samples (75 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", + "Generated a single JSONL file with 92 samples (75 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", + "Generated JSONL file with - 2850 max words, 75 samples - at ./dataset/gen-word-2850-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", + "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", + "Generated JSONL file with - 2840 max words, 75 samples - at ./dataset/gen-word-2840-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", + "Generated JSONL file with - 2910 max words, 75 samples - at ./dataset/gen-word-2910-count.jsonl\n", + "Generated JSONL file with - 2870 max words, 75 samples - at ./dataset/gen-word-2870-count.jsonl\n", + "Generated JSONL file with - 2880 max words, 75 samples - at ./dataset/gen-word-2880-count.jsonl\n", + "Generated JSONL file with - 2930 max words, 75 samples - at ./dataset/gen-word-2930-count.jsonl\n", + "Generated JSONL file with - 2920 max words, 75 samples - at ./dataset/gen-word-2920-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated JSONL file with - 2960 max words, 75 samples - at ./dataset/gen-word-2960-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", + "Generated JSONL file with - 2950 max words, 75 samples - at ./dataset/gen-word-2950-count.jsonl\n", + "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated JSONL file with - 2980 max words, 75 samples - at ./dataset/gen-word-2980-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", + "Generated JSONL file with - 2890 max words, 75 samples - at ./dataset/gen-word-2890-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", + "Generated JSONL file with - 2940 max words, 75 samples - at ./dataset/gen-word-2940-count.jsonl\n", + "Generated JSONL file with - 2990 max words, 75 samples - at ./dataset/gen-word-2990-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", + "Generated JSONL file with - 2810 max words, 75 samples - at ./dataset/gen-word-2810-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", + "Generated JSONL file with - 2860 max words, 75 samples - at ./dataset/gen-word-2860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", + "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", + "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 100 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", + "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", + "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3605 max words - at ./dataset/shuffle-word-3605-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3620 max words - at ./dataset/shuffle-word-3620-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3785 max words - at ./dataset/shuffle-word-3785-count.jsonl\n", - "Generated a single JSONL file with 113 samples (100 token repeat) - 2655 max words - at ./dataset/shuffle-word-2655-count.jsonl\n", - "Generated JSONL file with - 3785 max words, 100 samples - at ./dataset/gen-word-3785-count.jsonl\n", - "Generated JSONL file with - 3795 max words, 100 samples - at ./dataset/gen-word-3795-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3640 max words - at ./dataset/shuffle-word-3640-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3485 max words - at ./dataset/shuffle-word-3485-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3710 max words - at ./dataset/shuffle-word-3710-count.jsonl\n", - "Generated JSONL file with - 3630 max words, 100 samples - at ./dataset/gen-word-3630-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3795 max words - at ./dataset/shuffle-word-3795-count.jsonlGenerated JSONL file with - 3565 max words, 100 samples - at ./dataset/gen-word-3565-count.jsonl\n", - "\n", - "Generated JSONL file with - 3605 max words, 100 samples - at ./dataset/gen-word-3605-count.jsonl\n", - "Generated JSONL file with - 3635 max words, 100 samples - at ./dataset/gen-word-3635-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3695 max words - at ./dataset/shuffle-word-3695-count.jsonl\n", - "Generated a single JSONL file with 115 samples (100 token repeat) - 2675 max words - at ./dataset/shuffle-word-2675-count.jsonl\n", + "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", + "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", + "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", + "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", + "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", + "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", + "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3645 max words - at ./dataset/shuffle-word-3645-count.jsonl\n", - "Generated JSONL file with - 3555 max words, 100 samples - at ./dataset/gen-word-3555-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", - "Generated JSONL file with - 3640 max words, 100 samples - at ./dataset/gen-word-3640-count.jsonl\n", - "Generated JSONL file with - 3730 max words, 100 samples - at ./dataset/gen-word-3730-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3680 max words - at ./dataset/shuffle-word-3680-count.jsonl\n", - "Generated JSONL file with - 3740 max words, 100 samples - at ./dataset/gen-word-3740-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3595 max words - at ./dataset/shuffle-word-3595-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3745 max words - at ./dataset/shuffle-word-3745-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3580 max words - at ./dataset/shuffle-word-3580-count.jsonl\n", - "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", - "Generated JSONL file with - 3485 max words, 100 samples - at ./dataset/gen-word-3485-count.jsonl\n", - "Generated JSONL file with - 3665 max words, 100 samples - at ./dataset/gen-word-3665-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3765 max words - at ./dataset/shuffle-word-3765-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", - "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", - "Generated JSONL file with - 3590 max words, 100 samples - at ./dataset/gen-word-3590-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3565 max words - at ./dataset/shuffle-word-3565-count.jsonl\n", - "Generated a single JSONL file with 186 samples (100 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3715 max words - at ./dataset/shuffle-word-3715-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3660 max words - at ./dataset/shuffle-word-3660-count.jsonl\n", - "Generated a single JSONL file with 186 samples (100 token repeat) - 2485 max words - at ./dataset/shuffle-word-2485-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3585 max words - at ./dataset/shuffle-word-3585-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3190 max words - at ./dataset/shuffle-word-3190-count.jsonl\n", + "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", + "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", + "Generated JSONL file with - 2900 max words, 75 samples - at ./dataset/gen-word-2900-count.jsonl\n", + "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", + "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", + "\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", - "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3980 max words - at ./dataset/shuffle-word-3980-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3685 max words - at ./dataset/shuffle-word-3685-count.jsonl\n", - "Generated JSONL file with - 2485 max words, 100 samples - at ./dataset/gen-word-2485-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3245 max words - at ./dataset/shuffle-word-3245-count.jsonl\n", - "Generated JSONL file with - 3885 max words, 100 samples - at ./dataset/gen-word-3885-count.jsonl\n", - "Generated JSONL file with - 3580 max words, 100 samples - at ./dataset/gen-word-3580-count.jsonl\n", - "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3770 max words - at ./dataset/shuffle-word-3770-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3560 max words - at ./dataset/shuffle-word-3560-count.jsonl\n", - "Generated JSONL file with - 3820 max words, 100 samples - at ./dataset/gen-word-3820-count.jsonl\n", - "Generated JSONL file with - 3505 max words, 100 samples - at ./dataset/gen-word-3505-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3610 max words - at ./dataset/shuffle-word-3610-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3665 max words - at ./dataset/shuffle-word-3665-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3720 max words - at ./dataset/shuffle-word-3720-count.jsonl\n", - "Generated JSONL file with - 3845 max words, 100 samples - at ./dataset/gen-word-3845-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3865 max words - at ./dataset/shuffle-word-3865-count.jsonl\n", - "Generated a single JSONL file with 125 samples (100 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3410 max words - at ./dataset/shuffle-word-3410-count.jsonl\n", - "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", - "Generated a single JSONL file with 152 samples (100 token repeat) - 2575 max words - at ./dataset/shuffle-word-2575-count.jsonl\n", - "Generated JSONL file with - 3620 max words, 100 samples - at ./dataset/gen-word-3620-count.jsonl\n", - "Generated JSONL file with - 3390 max words, 100 samples - at ./dataset/gen-word-3390-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4580 max words - at ./dataset/shuffle-word-4580-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3810 max words - at ./dataset/shuffle-word-3810-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", - "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", - "Generated JSONL file with - 3515 max words, 100 samples - at ./dataset/gen-word-3515-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3435 max words - at ./dataset/shuffle-word-3435-count.jsonl\n", - "Generated JSONL file with - 3790 max words, 100 samples - at ./dataset/gen-word-3790-count.jsonl\n", + "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", + "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", + "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", - "Generated a single JSONL file with 103 samples (100 token repeat) - 2715 max words - at ./dataset/shuffle-word-2715-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 75 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", + "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", + "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", + "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", + "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", + "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", + "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", + "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3690 max words - at ./dataset/shuffle-word-3690-count.jsonl\n", + "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", - "\n", - "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonl\n", - "Generated JSONL file with - 3520 max words, 100 samples - at ./dataset/gen-word-3520-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3415 max words - at ./dataset/shuffle-word-3415-count.jsonl\n", - "Generated a single JSONL file with 149 samples (100 token repeat) - 2515 max words - at ./dataset/shuffle-word-2515-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3970 max words - at ./dataset/shuffle-word-3970-count.jsonlGenerated JSONL file with - 3770 max words, 100 samples - at ./dataset/gen-word-3770-count.jsonl\n", - "Generated JSONL file with - 3480 max words, 100 samples - at ./dataset/gen-word-3480-count.jsonl\n", - "\n", - "Generated a single JSONL file with 65288 samples (500 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", - "Generated JSONL file with - 3685 max words, 100 samples - at ./dataset/gen-word-3685-count.jsonl\n", - "Generated JSONL file with - 3760 max words, 100 samples - at ./dataset/gen-word-3760-count.jsonl\n", - "Generated a single JSONL file with 148 samples (100 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4385 max words - at ./dataset/shuffle-word-4385-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4440 max words - at ./dataset/shuffle-word-4440-count.jsonl\n", - "Generated JSONL file with - 3755 max words, 100 samples - at ./dataset/gen-word-3755-count.jsonl\n", - "Generated JSONL file with - 3585 max words, 100 samples - at ./dataset/gen-word-3585-count.jsonl\n", - "Generated JSONL file with - 4580 max words, 100 samples - at ./dataset/gen-word-4580-count.jsonl\n", - "Generated a single JSONL file with 123 samples (100 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3430 max words - at ./dataset/shuffle-word-3430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4930 max words - at ./dataset/shuffle-word-4930-count.jsonl\n", - "Generated JSONL file with - 3720 max words, 100 samples - at ./dataset/gen-word-3720-count.jsonl\n", - "Generated JSONL file with - 3690 max words, 100 samples - at ./dataset/gen-word-3690-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3805 max words - at ./dataset/shuffle-word-3805-count.jsonl\n", - "Generated JSONL file with - 3830 max words, 100 samples - at ./dataset/gen-word-3830-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4590 max words - at ./dataset/shuffle-word-4590-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4560 max words - at ./dataset/shuffle-word-4560-count.jsonl\n", - "Generated JSONL file with - 3570 max words, 100 samples - at ./dataset/gen-word-3570-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5035 max words - at ./dataset/shuffle-word-5035-count.jsonl\n", - "Generated JSONL file with - 3855 max words, 100 samples - at ./dataset/gen-word-3855-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5110 max words - at ./dataset/shuffle-word-5110-count.jsonl\n", - "Generated JSONL file with - 3530 max words, 100 samples - at ./dataset/gen-word-3530-count.jsonl\n", - "Generated JSONL file with - 4545 max words, 100 samples - at ./dataset/gen-word-4545-count.jsonl\n", - "Generated JSONL file with - 3840 max words, 100 samples - at ./dataset/gen-word-3840-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5185 max words - at ./dataset/shuffle-word-5185-count.jsonl\n", - "Generated JSONL file with - 2505 max words, 100 samples - at ./dataset/gen-word-2505-count.jsonl\n", - "Generated a single JSONL file with 199 samples (100 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", - "Generated a single JSONL file with 121 samples (100 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", - "Generated JSONL file with - 3810 max words, 100 samples - at ./dataset/gen-word-3810-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", - "Generated JSONL file with - 4560 max words, 100 samples - at ./dataset/gen-word-4560-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3830 max words - at ./dataset/shuffle-word-3830-count.jsonl\n", - "Generated JSONL file with - 3595 max words, 100 samples - at ./dataset/gen-word-3595-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2725 max words - at ./dataset/shuffle-word-2725-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3910 max words - at ./dataset/shuffle-word-3910-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4870 max words - at ./dataset/shuffle-word-4870-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4355 max words - at ./dataset/shuffle-word-4355-count.jsonl\n", + "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 4700 max words - at ./dataset/shuffle-word-4700-count.jsonl\n", - "Generated JSONL file with - 3615 max words, 100 samples - at ./dataset/gen-word-3615-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4635 max words - at ./dataset/shuffle-word-4635-count.jsonl\n", - "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", - "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5010 max words - at ./dataset/shuffle-word-5010-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", - "Generated JSONL file with - 5035 max words, 100 samples - at ./dataset/gen-word-5035-count.jsonl\n", - "Generated a single JSONL file with 105 samples (100 token repeat) - 2755 max words - at ./dataset/shuffle-word-2755-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4485 max words - at ./dataset/shuffle-word-4485-count.jsonl\n", - "Generated JSONL file with - 4605 max words, 100 samples - at ./dataset/gen-word-4605-count.jsonl\n", - "Generated a single JSONL file with 152 samples (100 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", - "Generated a single JSONL file with 121 samples (100 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5115 max words - at ./dataset/shuffle-word-5115-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5120 max words - at ./dataset/shuffle-word-5120-count.jsonl\n", - "Generated JSONL file with - 5130 max words, 100 samples - at ./dataset/gen-word-5130-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5055 max words - at ./dataset/shuffle-word-5055-count.jsonl\n", - "\n", - "Generated JSONL file with - 5185 max words, 100 samples - at ./dataset/gen-word-5185-count.jsonl\n", - "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2745 max words - at ./dataset/shuffle-word-2745-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5060 max words - at ./dataset/shuffle-word-5060-count.jsonl\n", - "Generated a single JSONL file with 118 samples (100 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", - "Generated JSONL file with - 3660 max words, 100 samples - at ./dataset/gen-word-3660-count.jsonl\n", - "Generated a single JSONL file with 114 samples (100 token repeat) - 2695 max words - at ./dataset/shuffle-word-2695-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3860 max words - at ./dataset/shuffle-word-3860-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4715 max words - at ./dataset/shuffle-word-4715-count.jsonl\n", - "Generated a single JSONL file with 149 samples (100 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", - "Generated a single JSONL file with 104 samples (100 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", - "Generated a single JSONL file with 121 samples (100 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", - "Generated JSONL file with - 3670 max words, 100 samples - at ./dataset/gen-word-3670-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", - "Generated JSONL file with - 2675 max words, 100 samples - at ./dataset/gen-word-2675-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 4375 max words - at ./dataset/shuffle-word-4375-count.jsonl\n", - "Generated JSONL file with - 2490 max words, 100 samples - at ./dataset/gen-word-2490-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3310 max words - at ./dataset/shuffle-word-3310-count.jsonl\n", - "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", - "Generated JSONL file with - 2520 max words, 100 samples - at ./dataset/gen-word-2520-count.jsonl\n", - "Generated JSONL file with - 4385 max words, 100 samples - at ./dataset/gen-word-4385-count.jsonl\n", - "Generated a single JSONL file with 150 samples (100 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5240 max words - at ./dataset/shuffle-word-5240-count.jsonl\n", - "Generated JSONL file with - 4445 max words, 100 samples - at ./dataset/gen-word-4445-count.jsonl\n", - "Generated JSONL file with - 4565 max words, 100 samples - at ./dataset/gen-word-4565-count.jsonl\n", - "Generated JSONL file with - 3695 max words, 100 samples - at ./dataset/gen-word-3695-count.jsonl\n", - "Generated JSONL file with - 2685 max words, 100 samples - at ./dataset/gen-word-2685-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4605 max words - at ./dataset/shuffle-word-4605-count.jsonl\n", - "Generated a single JSONL file with 121 samples (100 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", - "Generated JSONL file with - 3645 max words, 100 samples - at ./dataset/gen-word-3645-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3840 max words - at ./dataset/shuffle-word-3840-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5565 max words - at ./dataset/shuffle-word-5565-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5290 max words - at ./dataset/shuffle-word-5290-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4410 max words - at ./dataset/shuffle-word-4410-count.jsonl\n", - "Generated a single JSONL file with 88977 samples (500 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5340 max words - at ./dataset/shuffle-word-5340-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4610 max words - at ./dataset/shuffle-word-4610-count.jsonlGenerated a single JSONL file with 104 samples (100 token repeat) - 2765 max words - at ./dataset/shuffle-word-2765-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2795 max words - at ./dataset/shuffle-word-2795-count.jsonl\n", - "\n", - "Generated JSONL file with - 2690 max words, 100 samples - at ./dataset/gen-word-2690-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5180 max words - at ./dataset/shuffle-word-5180-count.jsonl\n", - "Generated JSONL file with - 3815 max words, 100 samples - at ./dataset/gen-word-3815-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5020 max words - at ./dataset/shuffle-word-5020-count.jsonl\n", - "Generated JSONL file with - 5110 max words, 100 samples - at ./dataset/gen-word-5110-count.jsonl\n", - "Generated a single JSONL file with 52995 samples (500 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3015 max words - at ./dataset/shuffle-word-3015-count.jsonl\n", - "Generated JSONL file with - 3270 max words, 100 samples - at ./dataset/gen-word-3270-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", - "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", - "Generated JSONL file with - 2740 max words, 100 samples - at ./dataset/gen-word-2740-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3095 max words - at ./dataset/shuffle-word-3095-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4670 max words - at ./dataset/shuffle-word-4670-count.jsonl\n", - "Generated a single JSONL file with 105 samples (100 token repeat) - 2785 max words - at ./dataset/shuffle-word-2785-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5445 max words - at ./dataset/shuffle-word-5445-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3040 max words - at ./dataset/shuffle-word-3040-count.jsonl\n", - "Generated JSONL file with - 2670 max words, 100 samples - at ./dataset/gen-word-2670-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4615 max words - at ./dataset/shuffle-word-4615-count.jsonl\n", - "Generated JSONL file with - 4935 max words, 100 samples - at ./dataset/gen-word-4935-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3895 max words - at ./dataset/shuffle-word-3895-count.jsonl\n", - "Generated JSONL file with - 3445 max words, 100 samples - at ./dataset/gen-word-3445-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4035 max words - at ./dataset/shuffle-word-4035-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2775 max words - at ./dataset/shuffle-word-2775-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4885 max words - at ./dataset/shuffle-word-4885-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2705 max words - at ./dataset/shuffle-word-2705-count.jsonl\n", - "Generated JSONL file with - 3260 max words, 100 samples - at ./dataset/gen-word-3260-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", - "Generated JSONL file with - 4645 max words, 100 samples - at ./dataset/gen-word-4645-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5085 max words - at ./dataset/shuffle-word-5085-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5470 max words - at ./dataset/shuffle-word-5470-count.jsonl\n", - "Generated JSONL file with - 4615 max words, 100 samples - at ./dataset/gen-word-4615-count.jsonl\n", - "Generated a single JSONL file with 105 samples (100 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", - "Generated JSONL file with - 4040 max words, 100 samples - at ./dataset/gen-word-4040-count.jsonl\n", - "Generated JSONL file with - 5885 max words, 100 samples - at ./dataset/gen-word-5885-count.jsonl\n", - "Generated JSONL file with - 3430 max words, 100 samples - at ./dataset/gen-word-3430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2845 max words - at ./dataset/shuffle-word-2845-count.jsonl\n", - "Generated JSONL file with - 2370 max words, 100 samples - at ./dataset/gen-word-2370-count.jsonl\n", - "\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4310 max words - at ./dataset/shuffle-word-4310-count.jsonl\n", - "Generated JSONL file with - 4335 max words, 100 samples - at ./dataset/gen-word-4335-count.jsonl\n", - "Generated JSONL file with - 5895 max words, 100 samples - at ./dataset/gen-word-5895-count.jsonlGenerated JSONL file with - 5120 max words, 100 samples - at ./dataset/gen-word-5120-count.jsonl\n", - "\n", - "Generated JSONL file with - 2525 max words, 100 samples - at ./dataset/gen-word-2525-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3260 max words - at ./dataset/shuffle-word-3260-count.jsonl\n", - "Generated JSONL file with - 2500 max words, 100 samples - at ./dataset/gen-word-2500-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5015 max words - at ./dataset/shuffle-word-5015-count.jsonl\n", - "Generated JSONL file with - 5010 max words, 100 samples - at ./dataset/gen-word-5010-count.jsonl\n", - "Generated JSONL file with - 5060 max words, 100 samples - at ./dataset/gen-word-5060-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5880 max words - at ./dataset/shuffle-word-5880-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4695 max words - at ./dataset/shuffle-word-4695-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4680 max words - at ./dataset/shuffle-word-4680-count.jsonl\n", - "Generated JSONL file with - 4365 max words, 100 samples - at ./dataset/gen-word-4365-count.jsonl\n", - "Generated JSONL file with - 2630 max words, 100 samples - at ./dataset/gen-word-2630-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4790 max words - at ./dataset/shuffle-word-4790-count.jsonl\n", - "Generated JSONL file with - 5055 max words, 100 samples - at ./dataset/gen-word-5055-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4080 max words - at ./dataset/shuffle-word-4080-count.jsonl\n", - "Generated a single JSONL file with 43603 samples (500 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", - "Generated JSONL file with - 5040 max words, 100 samples - at ./dataset/gen-word-5040-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4780 max words - at ./dataset/shuffle-word-4780-count.jsonl\n", - "Generated JSONL file with - 3420 max words, 100 samples - at ./dataset/gen-word-3420-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5760 max words - at ./dataset/shuffle-word-5760-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5845 max words - at ./dataset/shuffle-word-5845-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", - "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5970 max words - at ./dataset/shuffle-word-5970-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", - "Generated JSONL file with - 4665 max words, 100 samples - at ./dataset/gen-word-4665-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4705 max words - at ./dataset/shuffle-word-4705-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3890 max words - at ./dataset/shuffle-word-3890-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4545 max words - at ./dataset/shuffle-word-4545-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3130 max words - at ./dataset/shuffle-word-3130-count.jsonl\n", - "Generated JSONL file with - 5155 max words, 100 samples - at ./dataset/gen-word-5155-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4210 max words - at ./dataset/shuffle-word-4210-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", - "Generated JSONL file with - 4155 max words, 100 samples - at ./dataset/gen-word-4155-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3360 max words - at ./dataset/shuffle-word-3360-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5560 max words - at ./dataset/shuffle-word-5560-count.jsonl\n", - "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", - "Generated JSONL file with - 5190 max words, 100 samples - at ./dataset/gen-word-5190-count.jsonl\n", - "Generated JSONL file with - 4840 max words, 100 samples - at ./dataset/gen-word-4840-count.jsonl\n", - "Generated JSONL file with - 3000 max words, 100 samples - at ./dataset/gen-word-3000-count.jsonl\n", - "Generated JSONL file with - 4070 max words, 100 samples - at ./dataset/gen-word-4070-count.jsonl\n", - "Generated JSONL file with - 4420 max words, 100 samples - at ./dataset/gen-word-4420-count.jsonl\n", - "Generated JSONL file with - 4515 max words, 100 samples - at ./dataset/gen-word-4515-count.jsonl\n", - "Generated JSONL file with - 4860 max words, 100 samples - at ./dataset/gen-word-4860-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3185 max words - at ./dataset/shuffle-word-3185-count.jsonl\n", - "Generated JSONL file with - 3415 max words, 100 samples - at ./dataset/gen-word-3415-count.jsonl\n", - "Generated a single JSONL file with 102 samples (100 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", - "Generated JSONL file with - 2915 max words, 100 samples - at ./dataset/gen-word-2915-count.jsonl\n", - "Generated JSONL file with - 4995 max words, 100 samples - at ./dataset/gen-word-4995-count.jsonlGenerated JSONL file with - 2950 max words, 100 samples - at ./dataset/gen-word-2950-count.jsonlGenerated JSONL file with - 5115 max words, 100 samples - at ./dataset/gen-word-5115-count.jsonl\n", - "\n", - "\n", - "Generated JSONL file with - 4405 max words, 100 samples - at ./dataset/gen-word-4405-count.jsonl\n", - "Generated JSONL file with - 2660 max words, 100 samples - at ./dataset/gen-word-2660-count.jsonl\n", - "Generated JSONL file with - 4660 max words, 100 samples - at ./dataset/gen-word-4660-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4110 max words - at ./dataset/shuffle-word-4110-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2935 max words - at ./dataset/shuffle-word-2935-count.jsonl\n", - "Generated JSONL file with - 2775 max words, 100 samples - at ./dataset/gen-word-2775-count.jsonl\n", - "Generated JSONL file with - 2645 max words, 100 samples - at ./dataset/gen-word-2645-count.jsonl\n", - "Generated JSONL file with - 5470 max words, 100 samples - at ./dataset/gen-word-5470-count.jsonl\n", - "Generated JSONL file with - 2900 max words, 100 samples - at ./dataset/gen-word-2900-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", - "Generated JSONL file with - 2725 max words, 100 samples - at ./dataset/gen-word-2725-count.jsonl\n", - "Generated JSONL file with - 4190 max words, 100 samples - at ./dataset/gen-word-4190-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3065 max words - at ./dataset/shuffle-word-3065-count.jsonlGenerated JSONL file with - 3115 max words, 100 samples - at ./dataset/gen-word-3115-count.jsonl\n", - "\n", - "Generated a single JSONL file with 130339 samples (500 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4785 max words - at ./dataset/shuffle-word-4785-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4795 max words - at ./dataset/shuffle-word-4795-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5005 max words - at ./dataset/shuffle-word-5005-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5490 max words - at ./dataset/shuffle-word-5490-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4415 max words - at ./dataset/shuffle-word-4415-count.jsonl\n", - "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", - "Generated JSONL file with - 4260 max words, 100 samples - at ./dataset/gen-word-4260-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3220 max words - at ./dataset/shuffle-word-3220-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", - "Generated JSONL file with - 2865 max words, 100 samples - at ./dataset/gen-word-2865-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5045 max words - at ./dataset/shuffle-word-5045-count.jsonl\n", - "Generated JSONL file with - 4535 max words, 100 samples - at ./dataset/gen-word-4535-count.jsonl\n", - "Generated JSONL file with - 2855 max words, 100 samples - at ./dataset/gen-word-2855-count.jsonl\n", - "Generated JSONL file with - 2455 max words, 100 samples - at ./dataset/gen-word-2455-count.jsonl\n", - "Generated JSONL file with - 5080 max words, 100 samples - at ./dataset/gen-word-5080-count.jsonl\n", - "Generated JSONL file with - 4095 max words, 100 samples - at ./dataset/gen-word-4095-count.jsonl\n", - "Generated a single JSONL file with 103 samples (100 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2915 max words - at ./dataset/shuffle-word-2915-count.jsonl\n", - "Generated JSONL file with - 2870 max words, 100 samples - at ./dataset/gen-word-2870-count.jsonl\n", - "Generated a single JSONL file with 198 samples (100 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", - "Generated JSONL file with - 2715 max words, 100 samples - at ./dataset/gen-word-2715-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", - "Generated JSONL file with - 3215 max words, 100 samples - at ./dataset/gen-word-3215-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5665 max words - at ./dataset/shuffle-word-5665-count.jsonl\n", - "Generated JSONL file with - 4695 max words, 100 samples - at ./dataset/gen-word-4695-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2925 max words - at ./dataset/shuffle-word-2925-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3085 max words - at ./dataset/shuffle-word-3085-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2815 max words - at ./dataset/shuffle-word-2815-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3855 max words - at ./dataset/shuffle-word-3855-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4865 max words - at ./dataset/shuffle-word-4865-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2835 max words - at ./dataset/shuffle-word-2835-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3365 max words - at ./dataset/shuffle-word-3365-count.jsonl\n", - "Generated JSONL file with - 2875 max words, 100 samples - at ./dataset/gen-word-2875-count.jsonl\n", - "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", - "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2945 max words - at ./dataset/shuffle-word-2945-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4920 max words - at ./dataset/shuffle-word-4920-count.jsonl\n", - "Generated JSONL file with - 2940 max words, 100 samples - at ./dataset/gen-word-2940-count.jsonl\n", - "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", - "Generated JSONL file with - 2720 max words, 100 samples - at ./dataset/gen-word-2720-count.jsonl\n", - "Generated JSONL file with - 2555 max words, 100 samples - at ./dataset/gen-word-2555-count.jsonl\n", - "Generated JSONL file with - 2755 max words, 100 samples - at ./dataset/gen-word-2755-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3230 max words - at ./dataset/shuffle-word-3230-count.jsonl\n", - "Generated JSONL file with - 2985 max words, 100 samples - at ./dataset/gen-word-2985-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", - "Generated JSONL file with - 5565 max words, 100 samples - at ./dataset/gen-word-5565-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4090 max words - at ./dataset/shuffle-word-4090-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4395 max words - at ./dataset/shuffle-word-4395-count.jsonl\n", - "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5380 max words - at ./dataset/shuffle-word-5380-count.jsonl\n", - "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5130 max words - at ./dataset/shuffle-word-5130-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", - "Generated JSONL file with - 2640 max words, 100 samples - at ./dataset/gen-word-2640-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3285 max words - at ./dataset/shuffle-word-3285-count.jsonl\n", - "Generated JSONL file with - 4920 max words, 100 samples - at ./dataset/gen-word-4920-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5965 max words - at ./dataset/shuffle-word-5965-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3885 max words - at ./dataset/shuffle-word-3885-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3030 max words - at ./dataset/shuffle-word-3030-count.jsonl\n", - "Generated a single JSONL file with 123 samples (100 token repeat) - 2685 max words - at ./dataset/shuffle-word-2685-count.jsonl\n", - "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 4075 max words - at ./dataset/shuffle-word-4075-count.jsonl\n", - "Generated JSONL file with - 5570 max words, 100 samples - at ./dataset/gen-word-5570-count.jsonl\n", - "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3940 max words - at ./dataset/shuffle-word-3940-count.jsonl\n", - "Generated JSONL file with - 4020 max words, 100 samples - at ./dataset/gen-word-4020-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4330 max words - at ./dataset/shuffle-word-4330-count.jsonl\n", - "Generated JSONL file with - 3230 max words, 100 samples - at ./dataset/gen-word-3230-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4345 max words - at ./dataset/shuffle-word-4345-count.jsonl\n", - "Generated JSONL file with - 5140 max words, 100 samples - at ./dataset/gen-word-5140-count.jsonl\n", - "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", - "Generated JSONL file with - 2780 max words, 100 samples - at ./dataset/gen-word-2780-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3915 max words - at ./dataset/shuffle-word-3915-count.jsonl\n", - "Generated JSONL file with - 4910 max words, 100 samples - at ./dataset/gen-word-4910-count.jsonl\n", - "Generated JSONL file with - 2880 max words, 100 samples - at ./dataset/gen-word-2880-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3945 max words - at ./dataset/shuffle-word-3945-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5385 max words - at ./dataset/shuffle-word-5385-count.jsonlGenerated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", - "\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5605 max words - at ./dataset/shuffle-word-5605-count.jsonl\n", - "Generated JSONL file with - 2955 max words, 100 samples - at ./dataset/gen-word-2955-count.jsonl\n", - "Generated JSONL file with - 5270 max words, 100 samples - at ./dataset/gen-word-5270-count.jsonl\n", - "Generated JSONL file with - 4490 max words, 100 samples - at ./dataset/gen-word-4490-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3055 max words - at ./dataset/shuffle-word-3055-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4085 max words - at ./dataset/shuffle-word-4085-count.jsonl\n", - "Generated JSONL file with - 5315 max words, 100 samples - at ./dataset/gen-word-5315-count.jsonl\n", - "Generated JSONL file with - 2680 max words, 100 samples - at ./dataset/gen-word-2680-count.jsonl\n", - "Generated JSONL file with - 5880 max words, 100 samples - at ./dataset/gen-word-5880-count.jsonl\n", - "Generated JSONL file with - 5965 max words, 100 samples - at ./dataset/gen-word-5965-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4015 max words - at ./dataset/shuffle-word-4015-count.jsonl\n", - "Generated JSONL file with - 4930 max words, 100 samples - at ./dataset/gen-word-4930-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4445 max words - at ./dataset/shuffle-word-4445-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4165 max words - at ./dataset/shuffle-word-4165-count.jsonl\n", - "Generated JSONL file with - 3005 max words, 100 samples - at ./dataset/gen-word-3005-count.jsonl\n", - "Generated JSONL file with - 4195 max words, 100 samples - at ./dataset/gen-word-4195-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", "Generated JSONL file with - 4025 max words, 100 samples - at ./dataset/gen-word-4025-count.jsonl\n", - "Generated JSONL file with - 5240 max words, 100 samples - at ./dataset/gen-word-5240-count.jsonl\n", - "Generated JSONL file with - 2635 max words, 100 samples - at ./dataset/gen-word-2635-count.jsonl\n", - "Generated JSONL file with - 5335 max words, 100 samples - at ./dataset/gen-word-5335-count.jsonl\n", - "Generated JSONL file with - 2480 max words, 100 samples - at ./dataset/gen-word-2480-count.jsonl\n", - "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4240 max words - at ./dataset/shuffle-word-4240-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4290 max words - at ./dataset/shuffle-word-4290-count.jsonl\n", - "Generated JSONL file with - 4395 max words, 100 samples - at ./dataset/gen-word-4395-count.jsonl\n", - "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", - "Generated JSONL file with - 2825 max words, 100 samples - at ./dataset/gen-word-2825-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", - "Generated JSONL file with - 4370 max words, 100 samples - at ./dataset/gen-word-4370-count.jsonl\n", - "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", - "Generated JSONL file with - 4285 max words, 100 samples - at ./dataset/gen-word-4285-count.jsonl\n", - "Generated JSONL file with - 5615 max words, 100 samples - at ./dataset/gen-word-5615-count.jsonl\n", - "Generated JSONL file with - 2785 max words, 100 samples - at ./dataset/gen-word-2785-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", - "Generated JSONL file with - 4755 max words, 100 samples - at ./dataset/gen-word-4755-count.jsonl\n", - "Generated JSONL file with - 5845 max words, 100 samples - at ./dataset/gen-word-5845-count.jsonl\n", - "Generated JSONL file with - 5855 max words, 100 samples - at ./dataset/gen-word-5855-count.jsonl\n", - "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4360 max words - at ./dataset/shuffle-word-4360-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2985 max words - at ./dataset/shuffle-word-2985-count.jsonl\n", - "Generated JSONL file with - 2885 max words, 100 samples - at ./dataset/gen-word-2885-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4135 max words - at ./dataset/shuffle-word-4135-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5210 max words - at ./dataset/shuffle-word-5210-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5355 max words - at ./dataset/shuffle-word-5355-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4030 max words - at ./dataset/shuffle-word-4030-count.jsonl\n", - "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5670 max words - at ./dataset/shuffle-word-5670-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", - "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", - "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", - "Generated JSONL file with - 5015 max words, 100 samples - at ./dataset/gen-word-5015-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3380 max words - at ./dataset/shuffle-word-3380-count.jsonl\n", - "Generated JSONL file with - 4205 max words, 100 samples - at ./dataset/gen-word-4205-count.jsonl\n", - "Generated JSONL file with - 3890 max words, 100 samples - at ./dataset/gen-word-3890-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3215 max words - at ./dataset/shuffle-word-3215-count.jsonl\n", - "Generated JSONL file with - 3120 max words, 100 samples - at ./dataset/gen-word-3120-count.jsonl\n", - "Generated JSONL file with - 3805 max words, 100 samples - at ./dataset/gen-word-3805-count.jsonl\n", - "Generated JSONL file with - 3055 max words, 100 samples - at ./dataset/gen-word-3055-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3395 max words - at ./dataset/shuffle-word-3395-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3845 max words - at ./dataset/shuffle-word-3845-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2995 max words - at ./dataset/shuffle-word-2995-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4245 max words - at ./dataset/shuffle-word-4245-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2805 max words - at ./dataset/shuffle-word-2805-count.jsonl\n", - "Generated JSONL file with - 3355 max words, 100 samples - at ./dataset/gen-word-3355-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2895 max words - at ./dataset/shuffle-word-2895-count.jsonl\n", - "Generated JSONL file with - 3180 max words, 100 samples - at ./dataset/gen-word-3180-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2905 max words - at ./dataset/shuffle-word-2905-count.jsonl\n", - "Generated JSONL file with - 3040 max words, 100 samples - at ./dataset/gen-word-3040-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4235 max words - at ./dataset/shuffle-word-4235-count.jsonl\n", - "Generated JSONL file with - 3185 max words, 100 samples - at ./dataset/gen-word-3185-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4020 max words - at ./dataset/shuffle-word-4020-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4490 max words - at ./dataset/shuffle-word-4490-count.jsonl\n", - "Generated a single JSONL file with 101 samples (100 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2855 max words - at ./dataset/shuffle-word-2855-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2825 max words - at ./dataset/shuffle-word-2825-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5985 max words - at ./dataset/shuffle-word-5985-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", - "Generated JSONL file with - 3070 max words, 100 samples - at ./dataset/gen-word-3070-count.jsonl\n", - "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", - "Generated JSONL file with - 4120 max words, 100 samples - at ./dataset/gen-word-4120-count.jsonl\n", - "Generated JSONL file with - 3195 max words, 100 samples - at ./dataset/gen-word-3195-count.jsonl\n", - "Generated JSONL file with - 4030 max words, 100 samples - at ./dataset/gen-word-4030-count.jsonl\n", - "Generated JSONL file with - 2790 max words, 100 samples - at ./dataset/gen-word-2790-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3035 max words - at ./dataset/shuffle-word-3035-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4380 max words - at ./dataset/shuffle-word-4380-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5365 max words - at ./dataset/shuffle-word-5365-count.jsonl\n", - "Generated JSONL file with - 3315 max words, 100 samples - at ./dataset/gen-word-3315-count.jsonl\n", - "Generated JSONL file with - 3290 max words, 100 samples - at ./dataset/gen-word-3290-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4190 max words - at ./dataset/shuffle-word-4190-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5305 max words - at ./dataset/shuffle-word-5305-count.jsonl\n", - "Generated JSONL file with - 2700 max words, 100 samples - at ./dataset/gen-word-2700-count.jsonl\n", - "Generated JSONL file with - 5780 max words, 100 samples - at ./dataset/gen-word-5780-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3140 max words - at ./dataset/shuffle-word-3140-count.jsonl\n", - "Generated JSONL file with - 5180 max words, 100 samples - at ./dataset/gen-word-5180-count.jsonl\n", - "Generated JSONL file with - 2850 max words, 100 samples - at ./dataset/gen-word-2850-count.jsonl\n", - "Generated JSONL file with - 4590 max words, 100 samples - at ./dataset/gen-word-4590-count.jsonl\n", - "Generated JSONL file with - 5955 max words, 100 samples - at ./dataset/gen-word-5955-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5835 max words - at ./dataset/shuffle-word-5835-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3280 max words - at ./dataset/shuffle-word-3280-count.jsonl\n", - "Generated JSONL file with - 4505 max words, 100 samples - at ./dataset/gen-word-4505-count.jsonl\n", - "Generated JSONL file with - 3135 max words, 100 samples - at ./dataset/gen-word-3135-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4540 max words - at ./dataset/shuffle-word-4540-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", + "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", + "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 4125 max words - at ./dataset/shuffle-word-4125-count.jsonl\n", - "Generated JSONL file with - 5320 max words, 100 samples - at ./dataset/gen-word-5320-count.jsonl\n", - "Generated JSONL file with - 3920 max words, 100 samples - at ./dataset/gen-word-3920-count.jsonl\n", - "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", - "Generated JSONL file with - 4940 max words, 100 samples - at ./dataset/gen-word-4940-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5785 max words - at ./dataset/shuffle-word-5785-count.jsonl\n", - "Generated JSONL file with - 4990 max words, 100 samples - at ./dataset/gen-word-4990-count.jsonl\n", - "Generated JSONL file with - 3360 max words, 100 samples - at ./dataset/gen-word-3360-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3070 max words - at ./dataset/shuffle-word-3070-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3235 max words - at ./dataset/shuffle-word-3235-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4155 max words - at ./dataset/shuffle-word-4155-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4045 max words - at ./dataset/shuffle-word-4045-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3265 max words - at ./dataset/shuffle-word-3265-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3290 max words - at ./dataset/shuffle-word-3290-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2975 max words - at ./dataset/shuffle-word-2975-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4140 max words - at ./dataset/shuffle-word-4140-count.jsonl\n", - "Generated JSONL file with - 4245 max words, 100 samples - at ./dataset/gen-word-4245-count.jsonl\n", - "Generated JSONL file with - 2800 max words, 100 samples - at ./dataset/gen-word-2800-count.jsonl\n", - "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", - "Generated JSONL file with - 3860 max words, 100 samples - at ./dataset/gen-word-3860-count.jsonl\n", - "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", - "Generated JSONL file with - 2980 max words, 100 samples - at ./dataset/gen-word-2980-count.jsonl\n", - "Generated JSONL file with - 3410 max words, 100 samples - at ./dataset/gen-word-3410-count.jsonl\n", - "Generated JSONL file with - 2710 max words, 100 samples - at ./dataset/gen-word-2710-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3990 max words - at ./dataset/shuffle-word-3990-count.jsonl\n", - "Generated JSONL file with - 5690 max words, 100 samples - at ./dataset/gen-word-5690-count.jsonl\n", - "Generated JSONL file with - 3970 max words, 100 samples - at ./dataset/gen-word-3970-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", + "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", + "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", + "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", + "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", + "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", + "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 4575 max words - at ./dataset/shuffle-word-4575-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5410 max words - at ./dataset/shuffle-word-5410-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4595 max words - at ./dataset/shuffle-word-4595-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", - "Generated JSONL file with - 2860 max words, 100 samples - at ./dataset/gen-word-2860-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3020 max words - at ./dataset/shuffle-word-3020-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4945 max words - at ./dataset/shuffle-word-4945-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5790 max words - at ./dataset/shuffle-word-5790-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5690 max words - at ./dataset/shuffle-word-5690-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4010 max words - at ./dataset/shuffle-word-4010-count.jsonl\n", - "Generated JSONL file with - 3130 max words, 100 samples - at ./dataset/gen-word-3130-count.jsonl\n", - "Generated JSONL file with - 5555 max words, 100 samples - at ./dataset/gen-word-5555-count.jsonl\n", - "Generated JSONL file with - 3140 max words, 100 samples - at ./dataset/gen-word-3140-count.jsonl\n", + "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", "Generated JSONL file with - 4350 max words, 100 samples - at ./dataset/gen-word-4350-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4570 max words - at ./dataset/shuffle-word-4570-count.jsonl\n", - "Generated JSONL file with - 2895 max words, 100 samples - at ./dataset/gen-word-2895-count.jsonl\n", - "Generated JSONL file with - 5005 max words, 100 samples - at ./dataset/gen-word-5005-count.jsonl\n", - "Generated JSONL file with - 3465 max words, 100 samples - at ./dataset/gen-word-3465-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5105 max words - at ./dataset/shuffle-word-5105-count.jsonl\n", - "Generated JSONL file with - 2765 max words, 100 samples - at ./dataset/gen-word-2765-count.jsonl\n", - "Generated JSONL file with - 2745 max words, 100 samples - at ./dataset/gen-word-2745-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5465 max words - at ./dataset/shuffle-word-5465-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3010 max words - at ./dataset/shuffle-word-3010-count.jsonl\n", - "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4195 max words - at ./dataset/shuffle-word-4195-count.jsonl\n", - "Generated JSONL file with - 5305 max words, 100 samples - at ./dataset/gen-word-5305-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3815 max words - at ./dataset/shuffle-word-3815-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5095 max words - at ./dataset/shuffle-word-5095-count.jsonl\n", - "Generated JSONL file with - 2815 max words, 100 samples - at ./dataset/gen-word-2815-count.jsonl\n", - "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", - "Generated JSONL file with - 3395 max words, 100 samples - at ./dataset/gen-word-3395-count.jsonl\n", - "Generated JSONL file with - 2730 max words, 100 samples - at ./dataset/gen-word-2730-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5215 max words - at ./dataset/shuffle-word-5215-count.jsonl\n", - "Generated JSONL file with - 2820 max words, 100 samples - at ./dataset/gen-word-2820-count.jsonl\n", - "Generated a single JSONL file with 18868 samples (500 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", - "Generated JSONL file with - 3010 max words, 100 samples - at ./dataset/gen-word-3010-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4565 max words - at ./dataset/shuffle-word-4565-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4305 max words - at ./dataset/shuffle-word-4305-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", - "Generated JSONL file with - 4415 max words, 100 samples - at ./dataset/gen-word-4415-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5610 max words - at ./dataset/shuffle-word-5610-count.jsonl\n", "Generated JSONL file with - 4225 max words, 100 samples - at ./dataset/gen-word-4225-count.jsonl\n", - "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", - "Generated JSONL file with - 2760 max words, 100 samples - at ./dataset/gen-word-2760-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5930 max words - at ./dataset/shuffle-word-5930-count.jsonl\n", - "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", - "Generated JSONL file with - 4270 max words, 100 samples - at ./dataset/gen-word-4270-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4460 max words - at ./dataset/shuffle-word-4460-count.jsonl\n", - "Generated JSONL file with - 5540 max words, 100 samples - at ./dataset/gen-word-5540-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5870 max words - at ./dataset/shuffle-word-5870-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5980 max words - at ./dataset/shuffle-word-5980-count.jsonl\n", - "Generated JSONL file with - 4355 max words, 100 samples - at ./dataset/gen-word-4355-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5710 max words - at ./dataset/shuffle-word-5710-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3920 max words - at ./dataset/shuffle-word-3920-count.jsonl\n", - "Generated JSONL file with - 3915 max words, 100 samples - at ./dataset/gen-word-3915-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", - "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5705 max words - at ./dataset/shuffle-word-5705-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3340 max words - at ./dataset/shuffle-word-3340-count.jsonl\n", - "Generated JSONL file with - 3145 max words, 100 samples - at ./dataset/gen-word-3145-count.jsonl\n", - "Generated JSONL file with - 3870 max words, 100 samples - at ./dataset/gen-word-3870-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4910 max words - at ./dataset/shuffle-word-4910-count.jsonl\n", - "Generated JSONL file with - 2935 max words, 100 samples - at ./dataset/gen-word-2935-count.jsonl\n", - "Generated JSONL file with - 3990 max words, 100 samples - at ./dataset/gen-word-3990-count.jsonl\n", - "Generated JSONL file with - 3995 max words, 100 samples - at ./dataset/gen-word-3995-count.jsonl\n", - "Generated JSONL file with - 3455 max words, 100 samples - at ./dataset/gen-word-3455-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3255 max words - at ./dataset/shuffle-word-3255-count.jsonl\n", - "Generated JSONL file with - 4570 max words, 100 samples - at ./dataset/gen-word-4570-count.jsonl\n", - "Generated JSONL file with - 5590 max words, 100 samples - at ./dataset/gen-word-5590-count.jsonl\n", - "Generated JSONL file with - 5770 max words, 100 samples - at ./dataset/gen-word-5770-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5245 max words - at ./dataset/shuffle-word-5245-count.jsonl\n", - "Generated JSONL file with - 5640 max words, 100 samples - at ./dataset/gen-word-5640-count.jsonl\n", - "Generated JSONL file with - 3965 max words, 100 samples - at ./dataset/gen-word-3965-count.jsonl\n", - "Generated JSONL file with - 3435 max words, 100 samples - at ./dataset/gen-word-3435-count.jsonl\n", - "Generated JSONL file with - 3090 max words, 100 samples - at ./dataset/gen-word-3090-count.jsonl\n", - "Generated JSONL file with - 2830 max words, 100 samples - at ./dataset/gen-word-2830-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4585 max words - at ./dataset/shuffle-word-4585-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", - "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3135 max words - at ./dataset/shuffle-word-3135-count.jsonl\n", - "Generated a single JSONL file with 14732 samples (500 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3470 max words - at ./dataset/shuffle-word-3470-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", + "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", + "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 4300 max words - at ./dataset/shuffle-word-4300-count.jsonl\n", - "Generated JSONL file with - 3110 max words, 100 samples - at ./dataset/gen-word-3110-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5860 max words - at ./dataset/shuffle-word-5860-count.jsonl\n", - "Generated JSONL file with - 3880 max words, 100 samples - at ./dataset/gen-word-3880-count.jsonl\n", - "Generated JSONL file with - 4105 max words, 100 samples - at ./dataset/gen-word-4105-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", - "Generated JSONL file with - 2845 max words, 100 samples - at ./dataset/gen-word-2845-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3880 max words - at ./dataset/shuffle-word-3880-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3330 max words - at ./dataset/shuffle-word-3330-count.jsonl\n", - "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4965 max words - at ./dataset/shuffle-word-4965-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3905 max words - at ./dataset/shuffle-word-3905-count.jsonl\n", - "Generated JSONL file with - 3345 max words, 100 samples - at ./dataset/gen-word-3345-count.jsonl\n", - "Generated JSONL file with - 2810 max words, 100 samples - at ./dataset/gen-word-2810-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5040 max words - at ./dataset/shuffle-word-5040-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5765 max words - at ./dataset/shuffle-word-5765-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3195 max words - at ./dataset/shuffle-word-3195-count.jsonl\n", - "Generated JSONL file with - 2750 max words, 100 samples - at ./dataset/gen-word-2750-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5030 max words - at ./dataset/shuffle-word-5030-count.jsonl\n", - "Generated JSONL file with - 2805 max words, 100 samples - at ./dataset/gen-word-2805-count.jsonl\n", - "Generated JSONL file with - 4630 max words, 100 samples - at ./dataset/gen-word-4630-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5140 max words - at ./dataset/shuffle-word-5140-count.jsonl\n", - "Generated JSONL file with - 2840 max words, 100 samples - at ./dataset/gen-word-2840-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4735 max words - at ./dataset/shuffle-word-4735-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3115 max words - at ./dataset/shuffle-word-3115-count.jsonl\n", - "Generated JSONL file with - 3365 max words, 100 samples - at ./dataset/gen-word-3365-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5395 max words - at ./dataset/shuffle-word-5395-count.jsonl\n", - "Generated JSONL file with - 3210 max words, 100 samples - at ./dataset/gen-word-3210-count.jsonl\n", - "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4855 max words - at ./dataset/shuffle-word-4855-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5155 max words - at ./dataset/shuffle-word-5155-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5695 max words - at ./dataset/shuffle-word-5695-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4710 max words - at ./dataset/shuffle-word-4710-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3315 max words - at ./dataset/shuffle-word-3315-count.jsonl\n", - "Generated JSONL file with - 4780 max words, 100 samples - at ./dataset/gen-word-4780-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5345 max words - at ./dataset/shuffle-word-5345-count.jsonl\n", - "Generated JSONL file with - 2835 max words, 100 samples - at ./dataset/gen-word-2835-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4430 max words - at ./dataset/shuffle-word-4430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4455 max words - at ./dataset/shuffle-word-4455-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3390 max words - at ./dataset/shuffle-word-3390-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3820 max words - at ./dataset/shuffle-word-3820-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5735 max words - at ./dataset/shuffle-word-5735-count.jsonl\n", - "Generated JSONL file with - 3220 max words, 100 samples - at ./dataset/gen-word-3220-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4060 max words - at ./dataset/shuffle-word-4060-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3465 max words - at ./dataset/shuffle-word-3465-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5505 max words - at ./dataset/shuffle-word-5505-count.jsonl\n", - "Generated JSONL file with - 3960 max words, 100 samples - at ./dataset/gen-word-3960-count.jsonl\n", - "Generated JSONL file with - 5680 max words, 100 samples - at ./dataset/gen-word-5680-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4435 max words - at ./dataset/shuffle-word-4435-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4835 max words - at ./dataset/shuffle-word-4835-count.jsonl\n", + "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", "Generated JSONL file with - 4325 max words, 100 samples - at ./dataset/gen-word-4325-count.jsonl\n", - "Generated JSONL file with - 3190 max words, 100 samples - at ./dataset/gen-word-3190-count.jsonl\n", - "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5935 max words - at ./dataset/shuffle-word-5935-count.jsonl\n", - "Generated JSONL file with - 2925 max words, 100 samples - at ./dataset/gen-word-2925-count.jsonl\n", - "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3455 max words - at ./dataset/shuffle-word-3455-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5230 max words - at ./dataset/shuffle-word-5230-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3160 max words - at ./dataset/shuffle-word-3160-count.jsonl\n", - "Generated JSONL file with - 4585 max words, 100 samples - at ./dataset/gen-word-4585-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4170 max words - at ./dataset/shuffle-word-4170-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4655 max words - at ./dataset/shuffle-word-4655-count.jsonl\n", - "Generated JSONL file with - 4640 max words, 100 samples - at ./dataset/gen-word-4640-count.jsonl\n", - "Generated JSONL file with - 5345 max words, 100 samples - at ./dataset/gen-word-5345-count.jsonl\n", - "Generated JSONL file with - 2910 max words, 100 samples - at ./dataset/gen-word-2910-count.jsonl\n", - "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4180 max words - at ./dataset/shuffle-word-4180-count.jsonl\n", - "Generated JSONL file with - 3940 max words, 100 samples - at ./dataset/gen-word-3940-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3165 max words - at ./dataset/shuffle-word-3165-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4230 max words - at ./dataset/shuffle-word-4230-count.jsonl\n", - "Generated JSONL file with - 2995 max words, 100 samples - at ./dataset/gen-word-2995-count.jsonl\n", - "Generated JSONL file with - 4295 max words, 100 samples - at ./dataset/gen-word-4295-count.jsonl\n", - "Generated JSONL file with - 4085 max words, 100 samples - at ./dataset/gen-word-4085-count.jsonl\n", - "Generated JSONL file with - 2945 max words, 100 samples - at ./dataset/gen-word-2945-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", - "Generated JSONL file with - 3945 max words, 100 samples - at ./dataset/gen-word-3945-count.jsonl\n", - "Generated JSONL file with - 4235 max words, 100 samples - at ./dataset/gen-word-4235-count.jsonl\n", - "Generated JSONL file with - 3240 max words, 100 samples - at ./dataset/gen-word-3240-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4810 max words - at ./dataset/shuffle-word-4810-count.jsonl\n", - "Generated JSONL file with - 5195 max words, 100 samples - at ./dataset/gen-word-5195-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3955 max words - at ./dataset/shuffle-word-3955-count.jsonl\n", - "Generated JSONL file with - 3865 max words, 100 samples - at ./dataset/gen-word-3865-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3320 max words - at ./dataset/shuffle-word-3320-count.jsonl\n", - "Generated JSONL file with - 3035 max words, 100 samples - at ./dataset/gen-word-3035-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4665 max words - at ./dataset/shuffle-word-4665-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", - "Generated JSONL file with - 4280 max words, 100 samples - at ./dataset/gen-word-4280-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3965 max words - at ./dataset/shuffle-word-3965-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4185 max words - at ./dataset/shuffle-word-4185-count.jsonl\n", - "Generated JSONL file with - 5960 max words, 100 samples - at ./dataset/gen-word-5960-count.jsonl\n", - "Generated JSONL file with - 5090 max words, 100 samples - at ./dataset/gen-word-5090-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3295 max words - at ./dataset/shuffle-word-3295-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4840 max words - at ./dataset/shuffle-word-4840-count.jsonl\n", - "Generated JSONL file with - 3905 max words, 100 samples - at ./dataset/gen-word-3905-count.jsonl\n", - "Generated a single JSONL file with 20382 samples (500 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5065 max words - at ./dataset/shuffle-word-5065-count.jsonl\n", - "Generated JSONL file with - 3305 max words, 100 samples - at ./dataset/gen-word-3305-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5235 max words - at ./dataset/shuffle-word-5235-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3335 max words - at ./dataset/shuffle-word-3335-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4510 max words - at ./dataset/shuffle-word-4510-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4745 max words - at ./dataset/shuffle-word-4745-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4065 max words - at ./dataset/shuffle-word-4065-count.jsonl\n", - "Generated JSONL file with - 3165 max words, 100 samples - at ./dataset/gen-word-3165-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5255 max words - at ./dataset/shuffle-word-5255-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3210 max words - at ./dataset/shuffle-word-3210-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", - "Generated JSONL file with - 2990 max words, 100 samples - at ./dataset/gen-word-2990-count.jsonl\n", - "Generated JSONL file with - 4715 max words, 100 samples - at ./dataset/gen-word-4715-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3205 max words - at ./dataset/shuffle-word-3205-count.jsonl\n", - "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5495 max words - at ./dataset/shuffle-word-5495-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5660 max words - at ./dataset/shuffle-word-5660-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3995 max words - at ./dataset/shuffle-word-3995-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", - "Generated JSONL file with - 4210 max words, 100 samples - at ./dataset/gen-word-4210-count.jsonl\n", - "Generated JSONL file with - 4905 max words, 100 samples - at ./dataset/gen-word-4905-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3935 max words - at ./dataset/shuffle-word-3935-count.jsonl\n", - "Generated JSONL file with - 3160 max words, 100 samples - at ./dataset/gen-word-3160-count.jsonl\n", - "Generated JSONL file with - 3955 max words, 100 samples - at ./dataset/gen-word-3955-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5360 max words - at ./dataset/shuffle-word-5360-count.jsonl\n", - "Generated JSONL file with - 3835 max words, 100 samples - at ./dataset/gen-word-3835-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3270 max words - at ./dataset/shuffle-word-3270-count.jsonl\n", - "Generated JSONL file with - 4330 max words, 100 samples - at ./dataset/gen-word-4330-count.jsonl\n", - "Generated JSONL file with - 3385 max words, 100 samples - at ./dataset/gen-word-3385-count.jsonl\n", - "Generated JSONL file with - 5255 max words, 100 samples - at ./dataset/gen-word-5255-count.jsonl\n", - "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", - "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5520 max words - at ./dataset/shuffle-word-5520-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5285 max words - at ./dataset/shuffle-word-5285-count.jsonl\n", - "Generated JSONL file with - 2970 max words, 100 samples - at ./dataset/gen-word-2970-count.jsonl\n", - "Generated JSONL file with - 5815 max words, 100 samples - at ./dataset/gen-word-5815-count.jsonl\n", - "Generated JSONL file with - 5230 max words, 100 samples - at ./dataset/gen-word-5230-count.jsonl\n", - "Generated JSONL file with - 3935 max words, 100 samples - at ./dataset/gen-word-3935-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5805 max words - at ./dataset/shuffle-word-5805-count.jsonl\n", - "Generated JSONL file with - 4110 max words, 100 samples - at ./dataset/gen-word-4110-count.jsonl\n", - "Generated JSONL file with - 5705 max words, 100 samples - at ./dataset/gen-word-5705-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5205 max words - at ./dataset/shuffle-word-5205-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3460 max words - at ./dataset/shuffle-word-3460-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5570 max words - at ./dataset/shuffle-word-5570-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", - "Generated JSONL file with - 3080 max words, 100 samples - at ./dataset/gen-word-3080-count.jsonl\n", - "Generated JSONL file with - 2905 max words, 100 samples - at ./dataset/gen-word-2905-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5905 max words - at ./dataset/shuffle-word-5905-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3370 max words - at ./dataset/shuffle-word-3370-count.jsonl\n", - "Generated JSONL file with - 5260 max words, 100 samples - at ./dataset/gen-word-5260-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5910 max words - at ./dataset/shuffle-word-5910-count.jsonl\n", - "Generated a single JSONL file with 21936 samples (500 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", - "Generated JSONL file with - 5580 max words, 100 samples - at ./dataset/gen-word-5580-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5555 max words - at ./dataset/shuffle-word-5555-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5945 max words - at ./dataset/shuffle-word-5945-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", - "Generated JSONL file with - 4005 max words, 100 samples - at ./dataset/gen-word-4005-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5390 max words - at ./dataset/shuffle-word-5390-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5795 max words - at ./dataset/shuffle-word-5795-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3445 max words - at ./dataset/shuffle-word-3445-count.jsonl\n", - "Generated JSONL file with - 2965 max words, 100 samples - at ./dataset/gen-word-2965-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5770 max words - at ./dataset/shuffle-word-5770-count.jsonl\n", - "Generated JSONL file with - 4960 max words, 100 samples - at ./dataset/gen-word-4960-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4765 max words - at ./dataset/shuffle-word-4765-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5430 max words - at ./dataset/shuffle-word-5430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5315 max words - at ./dataset/shuffle-word-5315-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3870 max words - at ./dataset/shuffle-word-3870-count.jsonl\n", - "Generated JSONL file with - 5395 max words, 100 samples - at ./dataset/gen-word-5395-count.jsonl\n", - "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", - "Generated JSONL file with - 4610 max words, 100 samples - at ./dataset/gen-word-4610-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3405 max words - at ./dataset/shuffle-word-3405-count.jsonl\n", - "Generated JSONL file with - 5970 max words, 100 samples - at ./dataset/gen-word-5970-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3440 max words - at ./dataset/shuffle-word-3440-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3960 max words - at ./dataset/shuffle-word-3960-count.jsonl\n", - "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", - "Generated JSONL file with - 4765 max words, 100 samples - at ./dataset/gen-word-4765-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4295 max words - at ./dataset/shuffle-word-4295-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5890 max words - at ./dataset/shuffle-word-5890-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4105 max words - at ./dataset/shuffle-word-4105-count.jsonl\n", - "Generated JSONL file with - 4835 max words, 100 samples - at ./dataset/gen-word-4835-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5310 max words - at ./dataset/shuffle-word-5310-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3420 max words - at ./dataset/shuffle-word-3420-count.jsonl\n", - "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", - "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", - "Generated JSONL file with - 4045 max words, 100 samples - at ./dataset/gen-word-4045-count.jsonl\n", - "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", - "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5330 max words - at ./dataset/shuffle-word-5330-count.jsonl\n", - "Generated JSONL file with - 4215 max words, 100 samples - at ./dataset/gen-word-4215-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3385 max words - at ./dataset/shuffle-word-3385-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", - "Generated JSONL file with - 5330 max words, 100 samples - at ./dataset/gen-word-5330-count.jsonl\n", - "Generated JSONL file with - 3105 max words, 100 samples - at ./dataset/gen-word-3105-count.jsonl\n", - "Generated JSONL file with - 5860 max words, 100 samples - at ./dataset/gen-word-5860-count.jsonl\n", - "Generated JSONL file with - 2960 max words, 100 samples - at ./dataset/gen-word-2960-count.jsonl\n", - "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", - "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5480 max words - at ./dataset/shuffle-word-5480-count.jsonl\n", - "Generated JSONL file with - 5940 max words, 100 samples - at ./dataset/gen-word-5940-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4095 max words - at ./dataset/shuffle-word-4095-count.jsonl\n", - "Generated JSONL file with - 3060 max words, 100 samples - at ./dataset/gen-word-3060-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3835 max words - at ./dataset/shuffle-word-3835-count.jsonl\n", - "Generated JSONL file with - 4240 max words, 100 samples - at ./dataset/gen-word-4240-count.jsonl\n", - "Generated JSONL file with - 3020 max words, 100 samples - at ./dataset/gen-word-3020-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4285 max words - at ./dataset/shuffle-word-4285-count.jsonl\n", - "Generated JSONL file with - 3030 max words, 100 samples - at ./dataset/gen-word-3030-count.jsonl\n", - "Generated JSONL file with - 4670 max words, 100 samples - at ./dataset/gen-word-4670-count.jsonl\n", - "Generated JSONL file with - 4710 max words, 100 samples - at ./dataset/gen-word-4710-count.jsonl\n", - "Generated JSONL file with - 4510 max words, 100 samples - at ./dataset/gen-word-4510-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4770 max words - at ./dataset/shuffle-word-4770-count.jsonl\n", - "Generated a single JSONL file with 29393 samples (500 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", - "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", - "Generated JSONL file with - 4080 max words, 100 samples - at ./dataset/gen-word-4080-count.jsonl\n", - "Generated JSONL file with - 3310 max words, 100 samples - at ./dataset/gen-word-3310-count.jsonl\n", - "Generated JSONL file with - 3155 max words, 100 samples - at ./dataset/gen-word-3155-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3985 max words - at ./dataset/shuffle-word-3985-count.jsonl\n", - "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5895 max words - at ./dataset/shuffle-word-5895-count.jsonl\n", - "Generated JSONL file with - 4315 max words, 100 samples - at ./dataset/gen-word-4315-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4390 max words - at ./dataset/shuffle-word-4390-count.jsonl\n", - "Generated JSONL file with - 4035 max words, 100 samples - at ./dataset/gen-word-4035-count.jsonl\n", - "Generated JSONL file with - 5435 max words, 100 samples - at ./dataset/gen-word-5435-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", - "Generated JSONL file with - 4455 max words, 100 samples - at ./dataset/gen-word-4455-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4860 max words - at ./dataset/shuffle-word-4860-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4820 max words - at ./dataset/shuffle-word-4820-count.jsonl\n", - "Generated JSONL file with - 3065 max words, 100 samples - at ./dataset/gen-word-3065-count.jsonl\n", - "Generated JSONL file with - 4230 max words, 100 samples - at ./dataset/gen-word-4230-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4465 max words - at ./dataset/shuffle-word-4465-count.jsonl\n", - "Generated JSONL file with - 4390 max words, 100 samples - at ./dataset/gen-word-4390-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4470 max words - at ./dataset/shuffle-word-4470-count.jsonl\n", - "Generated JSONL file with - 5380 max words, 100 samples - at ./dataset/gen-word-5380-count.jsonl\n", - "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", - "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4720 max words - at ./dataset/shuffle-word-4720-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5265 max words - at ./dataset/shuffle-word-5265-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4130 max words - at ./dataset/shuffle-word-4130-count.jsonl\n", - "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", - "Generated JSONL file with - 5980 max words, 100 samples - at ./dataset/gen-word-5980-count.jsonl\n", - "Generated JSONL file with - 4945 max words, 100 samples - at ./dataset/gen-word-4945-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4040 max words - at ./dataset/shuffle-word-4040-count.jsonl\n", - "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5540 max words - at ./dataset/shuffle-word-5540-count.jsonl\n", - "Generated a single JSONL file with 16520 samples (500 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", - "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", - "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", - "Generated JSONL file with - 3255 max words, 100 samples - at ./dataset/gen-word-3255-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4730 max words - at ./dataset/shuffle-word-4730-count.jsonl\n", - "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", - "Generated JSONL file with - 5285 max words, 100 samples - at ./dataset/gen-word-5285-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", - "Generated JSONL file with - 4485 max words, 100 samples - at ./dataset/gen-word-4485-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 5275 max words - at ./dataset/shuffle-word-5275-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4120 max words - at ./dataset/shuffle-word-4120-count.jsonl\n", - "Generated JSONL file with - 3245 max words, 100 samples - at ./dataset/gen-word-3245-count.jsonl\n", - "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", - "Generated JSONL file with - 3985 max words, 100 samples - at ./dataset/gen-word-3985-count.jsonl\n", - "Generated JSONL file with - 3280 max words, 100 samples - at ./dataset/gen-word-3280-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4520 max words - at ./dataset/shuffle-word-4520-count.jsonl\n", - "Generated JSONL file with - 4015 max words, 100 samples - at ./dataset/gen-word-4015-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5090 max words - at ./dataset/shuffle-word-5090-count.jsonl\n", - "Generated JSONL file with - 4685 max words, 100 samples - at ./dataset/gen-word-4685-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5535 max words - at ./dataset/shuffle-word-5535-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", - "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", - "Generated JSONL file with - 5085 max words, 100 samples - at ./dataset/gen-word-5085-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4160 max words - at ./dataset/shuffle-word-4160-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5405 max words - at ./dataset/shuffle-word-5405-count.jsonl\n", - "Generated JSONL file with - 4140 max words, 100 samples - at ./dataset/gen-word-4140-count.jsonl\n", - "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4005 max words - at ./dataset/shuffle-word-4005-count.jsonl\n", - "Generated JSONL file with - 3170 max words, 100 samples - at ./dataset/gen-word-3170-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5160 max words - at ./dataset/shuffle-word-5160-count.jsonl\n", - "Generated JSONL file with - 5020 max words, 100 samples - at ./dataset/gen-word-5020-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4370 max words - at ./dataset/shuffle-word-4370-count.jsonl\n", - "Generated JSONL file with - 4010 max words, 100 samples - at ./dataset/gen-word-4010-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4055 max words - at ./dataset/shuffle-word-4055-count.jsonl\n", - "Generated JSONL file with - 3330 max words, 100 samples - at ./dataset/gen-word-3330-count.jsonl\n", - "Generated JSONL file with - 3370 max words, 100 samples - at ./dataset/gen-word-3370-count.jsonl\n", - "Generated JSONL file with - 4130 max words, 100 samples - at ./dataset/gen-word-4130-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4630 max words - at ./dataset/shuffle-word-4630-count.jsonl\n", - "Generated JSONL file with - 3205 max words, 100 samples - at ./dataset/gen-word-3205-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4815 max words - at ./dataset/shuffle-word-4815-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4645 max words - at ./dataset/shuffle-word-4645-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4255 max words - at ./dataset/shuffle-word-4255-count.jsonl\n", - "Generated JSONL file with - 3460 max words, 100 samples - at ./dataset/gen-word-3460-count.jsonl\n", - "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", - "Generated JSONL file with - 5745 max words, 100 samples - at ./dataset/gen-word-5745-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4690 max words - at ./dataset/shuffle-word-4690-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", + "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", + "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 4925 max words - at ./dataset/shuffle-word-4925-count.jsonl\n", - "Generated JSONL file with - 3265 max words, 100 samples - at ./dataset/gen-word-3265-count.jsonl\n", - "Generated JSONL file with - 3405 max words, 100 samples - at ./dataset/gen-word-3405-count.jsonl\n", - "Generated JSONL file with - 4770 max words, 100 samples - at ./dataset/gen-word-4770-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4070 max words - at ./dataset/shuffle-word-4070-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4880 max words - at ./dataset/shuffle-word-4880-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", - "Generated JSONL file with - 5370 max words, 100 samples - at ./dataset/gen-word-5370-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5620 max words - at ./dataset/shuffle-word-5620-count.jsonl\n", - "Generated JSONL file with - 3235 max words, 100 samples - at ./dataset/gen-word-3235-count.jsonl\n", - "Generated JSONL file with - 5360 max words, 100 samples - at ./dataset/gen-word-5360-count.jsonl\n", - "Generated JSONL file with - 5445 max words, 100 samples - at ./dataset/gen-word-5445-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5195 max words - at ./dataset/shuffle-word-5195-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 4775 max words - at ./dataset/shuffle-word-4775-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5455 max words - at ./dataset/shuffle-word-5455-count.jsonl\n", - "Generated JSONL file with - 4440 max words, 100 samples - at ./dataset/gen-word-4440-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4740 max words - at ./dataset/shuffle-word-4740-count.jsonl\n", - "Generated JSONL file with - 5165 max words, 100 samples - at ./dataset/gen-word-5165-count.jsonl\n", - "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", - "Generated JSONL file with - 5385 max words, 100 samples - at ./dataset/gen-word-5385-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5515 max words - at ./dataset/shuffle-word-5515-count.jsonl\n", - "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", - "Generated JSONL file with - 4145 max words, 100 samples - at ./dataset/gen-word-4145-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5485 max words - at ./dataset/shuffle-word-5485-count.jsonl\n", - "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", - "Generated JSONL file with - 3320 max words, 100 samples - at ./dataset/gen-word-3320-count.jsonl\n", - "Generated JSONL file with - 3285 max words, 100 samples - at ./dataset/gen-word-3285-count.jsonl\n", - "Generated JSONL file with - 4855 max words, 100 samples - at ./dataset/gen-word-4855-count.jsonl\n", - "Generated JSONL file with - 3335 max words, 100 samples - at ./dataset/gen-word-3335-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4480 max words - at ./dataset/shuffle-word-4480-count.jsonl\n", - "Generated JSONL file with - 5415 max words, 100 samples - at ./dataset/gen-word-5415-count.jsonl\n", - "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", - "Generated JSONL file with - 5485 max words, 100 samples - at ./dataset/gen-word-5485-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 3930 max words - at ./dataset/shuffle-word-3930-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4915 max words - at ./dataset/shuffle-word-4915-count.jsonl\n", - "Generated JSONL file with - 5560 max words, 100 samples - at ./dataset/gen-word-5560-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5815 max words - at ./dataset/shuffle-word-5815-count.jsonl\n", - "Generated JSONL file with - 3295 max words, 100 samples - at ./dataset/gen-word-3295-count.jsonl\n", - "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonlGenerated JSONL file with - 4090 max words, 100 samples - at ./dataset/gen-word-4090-count.jsonl\n", - "\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4890 max words - at ./dataset/shuffle-word-4890-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4535 max words - at ./dataset/shuffle-word-4535-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5420 max words - at ./dataset/shuffle-word-5420-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4145 max words - at ./dataset/shuffle-word-4145-count.jsonl\n", - "Generated JSONL file with - 3380 max words, 100 samples - at ./dataset/gen-word-3380-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4115 max words - at ./dataset/shuffle-word-4115-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4420 max words - at ./dataset/shuffle-word-4420-count.jsonl\n", - "Generated JSONL file with - 5455 max words, 100 samples - at ./dataset/gen-word-5455-count.jsonl\n", - "Generated JSONL file with - 4785 max words, 100 samples - at ./dataset/gen-word-4785-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5145 max words - at ./dataset/shuffle-word-5145-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5335 max words - at ./dataset/shuffle-word-5335-count.jsonl\n", - "Generated JSONL file with - 5430 max words, 100 samples - at ./dataset/gen-word-5430-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4685 max words - at ./dataset/shuffle-word-4685-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5460 max words - at ./dataset/shuffle-word-5460-count.jsonlGenerated JSONL file with - 4065 max words, 100 samples - at ./dataset/gen-word-4065-count.jsonl\n", - "Generated JSONL file with - 5280 max words, 100 samples - at ./dataset/gen-word-5280-count.jsonl\n", - "\n", - "Generated JSONL file with - 3340 max words, 100 samples - at ./dataset/gen-word-3340-count.jsonl\n", - "Generated JSONL file with - 4220 max words, 100 samples - at ./dataset/gen-word-4220-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5745 max words - at ./dataset/shuffle-word-5745-count.jsonl\n", - "Generated JSONL file with - 4820 max words, 100 samples - at ./dataset/gen-word-4820-count.jsonl\n", - "Generated JSONL file with - 4060 max words, 100 samples - at ./dataset/gen-word-4060-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4830 max words - at ./dataset/shuffle-word-4830-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4805 max words - at ./dataset/shuffle-word-4805-count.jsonl\n", - "Generated JSONL file with - 5695 max words, 100 samples - at ./dataset/gen-word-5695-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5630 max words - at ./dataset/shuffle-word-5630-count.jsonl\n", - "Generated JSONL file with - 4135 max words, 100 samples - at ./dataset/gen-word-4135-count.jsonl\n", - "Generated JSONL file with - 5730 max words, 100 samples - at ./dataset/gen-word-5730-count.jsonl\n", - "Generated JSONL file with - 4745 max words, 100 samples - at ./dataset/gen-word-4745-count.jsonl\n", - "Generated JSONL file with - 4915 max words, 100 samples - at ./dataset/gen-word-4915-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4760 max words - at ./dataset/shuffle-word-4760-count.jsonl\n", - "Generated JSONL file with - 5490 max words, 100 samples - at ./dataset/gen-word-5490-count.jsonl\n", - "Generated JSONL file with - 5585 max words, 100 samples - at ./dataset/gen-word-5585-count.jsonl\n", - "Generated JSONL file with - 4320 max words, 100 samples - at ./dataset/gen-word-4320-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4935 max words - at ./dataset/shuffle-word-4935-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5165 max words - at ./dataset/shuffle-word-5165-count.jsonl\n", - "Generated JSONL file with - 4055 max words, 100 samples - at ./dataset/gen-word-4055-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4980 max words - at ./dataset/shuffle-word-4980-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5220 max words - at ./dataset/shuffle-word-5220-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", - "\n", - "Generated JSONL file with - 4985 max words, 100 samples - at ./dataset/gen-word-4985-count.jsonl\n", - "Generated JSONL file with - 4740 max words, 100 samples - at ./dataset/gen-word-4740-count.jsonl\n", - "Generated JSONL file with - 4165 max words, 100 samples - at ./dataset/gen-word-4165-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5295 max words - at ./dataset/shuffle-word-5295-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5135 max words - at ./dataset/shuffle-word-5135-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5510 max words - at ./dataset/shuffle-word-5510-count.jsonl\n", - "Generated JSONL file with - 5635 max words, 100 samples - at ./dataset/gen-word-5635-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5635 max words - at ./dataset/shuffle-word-5635-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5855 max words - at ./dataset/shuffle-word-5855-count.jsonl\n", - "Generated JSONL file with - 3895 max words, 100 samples - at ./dataset/gen-word-3895-count.jsonl\n", - "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", - "Generated JSONL file with - 4180 max words, 100 samples - at ./dataset/gen-word-4180-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5190 max words - at ./dataset/shuffle-word-5190-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5715 max words - at ./dataset/shuffle-word-5715-count.jsonl\n", - "Generated JSONL file with - 3910 max words, 100 samples - at ./dataset/gen-word-3910-count.jsonl\n", - "Generated JSONL file with - 5805 max words, 100 samples - at ./dataset/gen-word-5805-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5680 max words - at ./dataset/shuffle-word-5680-count.jsonl\n", - "Generated JSONL file with - 4185 max words, 100 samples - at ./dataset/gen-word-4185-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5170 max words - at ./dataset/shuffle-word-5170-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4960 max words - at ./dataset/shuffle-word-4960-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5415 max words - at ./dataset/shuffle-word-5415-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5545 max words - at ./dataset/shuffle-word-5545-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4365 max words - at ./dataset/shuffle-word-4365-count.jsonl\n", - "Generated JSONL file with - 4115 max words, 100 samples - at ./dataset/gen-word-4115-count.jsonl\n", - "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", - "Generated JSONL file with - 3980 max words, 100 samples - at ./dataset/gen-word-3980-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5645 max words - at ./dataset/shuffle-word-5645-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4315 max words - at ./dataset/shuffle-word-4315-count.jsonl\n", - "Generated JSONL file with - 4340 max words, 100 samples - at ./dataset/gen-word-4340-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5080 max words - at ./dataset/shuffle-word-5080-count.jsonl\n", - "Generated JSONL file with - 4345 max words, 100 samples - at ./dataset/gen-word-4345-count.jsonl\n", - "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", - "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4955 max words - at ./dataset/shuffle-word-4955-count.jsonl\n", - "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4985 max words - at ./dataset/shuffle-word-4985-count.jsonl\n", - "Generated JSONL file with - 5145 max words, 100 samples - at ./dataset/gen-word-5145-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4845 max words - at ./dataset/shuffle-word-4845-count.jsonl\n", - "Generated JSONL file with - 5760 max words, 100 samples - at ./dataset/gen-word-5760-count.jsonl\n", - "Generated JSONL file with - 4290 max words, 100 samples - at ./dataset/gen-word-4290-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", + "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", + "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", + "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", + "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", + "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", + "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", + "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", + "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", + "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", + "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", + "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", + "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", + "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", + "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", + "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", + "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", + "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", + "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", "Generated JSONL file with - 5375 max words, 100 samples - at ./dataset/gen-word-5375-count.jsonl\n", - "Generated JSONL file with - 4965 max words, 100 samples - at ./dataset/gen-word-4965-count.jsonl\n", - "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5320 max words - at ./dataset/shuffle-word-5320-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5370 max words - at ./dataset/shuffle-word-5370-count.jsonl\n", - "Generated JSONL file with - 4620 max words, 100 samples - at ./dataset/gen-word-4620-count.jsonl\n", - "Generated JSONL file with - 5530 max words, 100 samples - at ./dataset/gen-word-5530-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5530 max words - at ./dataset/shuffle-word-5530-count.jsonl\n", - "Generated JSONL file with - 5810 max words, 100 samples - at ./dataset/gen-word-5810-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5960 max words - at ./dataset/shuffle-word-5960-count.jsonl\n", - "Generated JSONL file with - 5670 max words, 100 samples - at ./dataset/gen-word-5670-count.jsonl\n", - "Generated JSONL file with - 4380 max words, 100 samples - at ./dataset/gen-word-4380-count.jsonl\n", - "Generated JSONL file with - 5535 max words, 100 samples - at ./dataset/gen-word-5535-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5990 max words - at ./dataset/shuffle-word-5990-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4335 max words - at ./dataset/shuffle-word-4335-count.jsonl\n", - "Generated JSONL file with - 5755 max words, 100 samples - at ./dataset/gen-word-5755-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4495 max words - at ./dataset/shuffle-word-4495-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5830 max words - at ./dataset/shuffle-word-5830-count.jsonl\n", - "Generated JSONL file with - 4305 max words, 100 samples - at ./dataset/gen-word-4305-count.jsonl\n", - "Generated JSONL file with - 4160 max words, 100 samples - at ./dataset/gen-word-4160-count.jsonl\n", - "Generated JSONL file with - 5515 max words, 100 samples - at ./dataset/gen-word-5515-count.jsonl\n", - "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", - "Generated JSONL file with - 3930 max words, 100 samples - at ./dataset/gen-word-3930-count.jsonl\n", - "Generated JSONL file with - 4310 max words, 100 samples - at ./dataset/gen-word-4310-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5730 max words - at ./dataset/shuffle-word-5730-count.jsonl\n", - "Generated a single JSONL file with 24028 samples (500 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5440 max words - at ./dataset/shuffle-word-5440-count.jsonl\n", - "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4265 max words - at ./dataset/shuffle-word-4265-count.jsonl\n", - "Generated JSONL file with - 4170 max words, 100 samples - at ./dataset/gen-word-4170-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5810 max words - at ./dataset/shuffle-word-5810-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4280 max words - at ./dataset/shuffle-word-4280-count.jsonl\n", - "Generated JSONL file with - 4890 max words, 100 samples - at ./dataset/gen-word-4890-count.jsonl\n", - "Generated JSONL file with - 4980 max words, 100 samples - at ./dataset/gen-word-4980-count.jsonl\n", - "Generated JSONL file with - 4705 max words, 100 samples - at ./dataset/gen-word-4705-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5270 max words - at ./dataset/shuffle-word-5270-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5280 max words - at ./dataset/shuffle-word-5280-count.jsonl\n", + "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", + "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 5875 max words - at ./dataset/shuffle-word-5875-count.jsonl\n", - "Generated JSONL file with - 5290 max words, 100 samples - at ./dataset/gen-word-5290-count.jsonl\n", - "Generated JSONL file with - 4760 max words, 100 samples - at ./dataset/gen-word-4760-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4340 max words - at ./dataset/shuffle-word-4340-count.jsonl\n", - "Generated JSONL file with - 5420 max words, 100 samples - at ./dataset/gen-word-5420-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4755 max words - at ./dataset/shuffle-word-4755-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4405 max words - at ./dataset/shuffle-word-4405-count.jsonl\n", - "Generated JSONL file with - 5465 max words, 100 samples - at ./dataset/gen-word-5465-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4220 max words - at ./dataset/shuffle-word-4220-count.jsonl\n", - "Generated JSONL file with - 5920 max words, 100 samples - at ./dataset/gen-word-5920-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", - "Generated JSONL file with - 4895 max words, 100 samples - at ./dataset/gen-word-4895-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5590 max words - at ./dataset/shuffle-word-5590-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", + "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", + "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", + "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", + "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", "Generated a single JSONL file with 100 samples (100 token repeat) - 5850 max words - at ./dataset/shuffle-word-5850-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4515 max words - at ./dataset/shuffle-word-4515-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4530 max words - at ./dataset/shuffle-word-4530-count.jsonl\n", - "Generated JSONL file with - 5205 max words, 100 samples - at ./dataset/gen-word-5205-count.jsonl\n", - "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", - "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", - "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5070 max words - at ./dataset/shuffle-word-5070-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4205 max words - at ./dataset/shuffle-word-4205-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", + "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", + "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", + "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", "Generated JSONL file with - 5000 max words, 100 samples - at ./dataset/gen-word-5000-count.jsonl\n", - "Generated JSONL file with - 4790 max words, 100 samples - at ./dataset/gen-word-4790-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5865 max words - at ./dataset/shuffle-word-5865-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4270 max words - at ./dataset/shuffle-word-4270-count.jsonl\n", - "Generated JSONL file with - 5790 max words, 100 samples - at ./dataset/gen-word-5790-count.jsonl\n", - "Generated JSONL file with - 5665 max words, 100 samples - at ./dataset/gen-word-5665-count.jsonl\n", - "Generated JSONL file with - 4555 max words, 100 samples - at ./dataset/gen-word-4555-count.jsonl\n", - "Generated JSONL file with - 5840 max words, 100 samples - at ./dataset/gen-word-5840-count.jsonl\n", - "Generated JSONL file with - 4680 max words, 100 samples - at ./dataset/gen-word-4680-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4260 max words - at ./dataset/shuffle-word-4260-count.jsonl\n", - "Generated JSONL file with - 5390 max words, 100 samples - at ./dataset/gen-word-5390-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5720 max words - at ./dataset/shuffle-word-5720-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5940 max words - at ./dataset/shuffle-word-5940-count.jsonl\n", - "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", - "Generated JSONL file with - 5645 max words, 100 samples - at ./dataset/gen-word-5645-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4990 max words - at ./dataset/shuffle-word-4990-count.jsonl\n", - "Generated JSONL file with - 4265 max words, 100 samples - at ./dataset/gen-word-4265-count.jsonl\n", - "Generated JSONL file with - 5215 max words, 100 samples - at ./dataset/gen-word-5215-count.jsonl\n", - "Generated JSONL file with - 5095 max words, 100 samples - at ./dataset/gen-word-5095-count.jsonl\n", - "Generated JSONL file with - 5930 max words, 100 samples - at ./dataset/gen-word-5930-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", - "Generated JSONL file with - 5765 max words, 100 samples - at ./dataset/gen-word-5765-count.jsonl\n", - "Generated JSONL file with - 4815 max words, 100 samples - at ./dataset/gen-word-4815-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4320 max words - at ./dataset/shuffle-word-4320-count.jsonl\n", - "Generated JSONL file with - 5715 max words, 100 samples - at ./dataset/gen-word-5715-count.jsonl\n", - "Generated JSONL file with - 5355 max words, 100 samples - at ./dataset/gen-word-5355-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4215 max words - at ./dataset/shuffle-word-4215-count.jsonl\n", "Generated JSONL file with - 5700 max words, 100 samples - at ./dataset/gen-word-5700-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5595 max words - at ./dataset/shuffle-word-5595-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5435 max words - at ./dataset/shuffle-word-5435-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4970 max words - at ./dataset/shuffle-word-4970-count.jsonl\n", - "Generated JSONL file with - 5835 max words, 100 samples - at ./dataset/gen-word-5835-count.jsonl\n", - "Generated JSONL file with - 4690 max words, 100 samples - at ./dataset/gen-word-4690-count.jsonl\n", - "Generated JSONL file with - 4635 max words, 100 samples - at ./dataset/gen-word-4635-count.jsonl\n", - "Generated JSONL file with - 4655 max words, 100 samples - at ./dataset/gen-word-4655-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4505 max words - at ./dataset/shuffle-word-4505-count.jsonl\n", - "Generated JSONL file with - 5495 max words, 100 samples - at ./dataset/gen-word-5495-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4640 max words - at ./dataset/shuffle-word-4640-count.jsonl\n", - "Generated JSONL file with - 5070 max words, 100 samples - at ./dataset/gen-word-5070-count.jsonl\n", - "Generated JSONL file with - 5610 max words, 100 samples - at ./dataset/gen-word-5610-count.jsonl\n", - "Generated JSONL file with - 5210 max words, 100 samples - at ./dataset/gen-word-5210-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4940 max words - at ./dataset/shuffle-word-4940-count.jsonl\n", - "Generated JSONL file with - 4465 max words, 100 samples - at ./dataset/gen-word-4465-count.jsonl\n", - "Generated JSONL file with - 5160 max words, 100 samples - at ./dataset/gen-word-5160-count.jsonl\n", - "Generated JSONL file with - 5170 max words, 100 samples - at ./dataset/gen-word-5170-count.jsonl\n", - "Generated JSONL file with - 5820 max words, 100 samples - at ./dataset/gen-word-5820-count.jsonl\n", - "Generated JSONL file with - 4865 max words, 100 samples - at ./dataset/gen-word-4865-count.jsonl\n", + "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", + "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", + "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", + "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", "Generated JSONL file with - 5875 max words, 100 samples - at ./dataset/gen-word-5875-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4620 max words - at ./dataset/shuffle-word-4620-count.jsonl\n", - "Generated JSONL file with - 5245 max words, 100 samples - at ./dataset/gen-word-5245-count.jsonl\n", - "Generated JSONL file with - 5310 max words, 100 samples - at ./dataset/gen-word-5310-count.jsonl\n", - "Generated JSONL file with - 5865 max words, 100 samples - at ./dataset/gen-word-5865-count.jsonl\n", - "Generated JSONL file with - 5510 max words, 100 samples - at ./dataset/gen-word-5510-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", - "Generated JSONL file with - 5990 max words, 100 samples - at ./dataset/gen-word-5990-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", - "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", - "Generated JSONL file with - 5795 max words, 100 samples - at ./dataset/gen-word-5795-count.jsonl\n", - "Generated JSONL file with - 5905 max words, 100 samples - at ./dataset/gen-word-5905-count.jsonl\n", - "Generated JSONL file with - 5720 max words, 100 samples - at ./dataset/gen-word-5720-count.jsonl\n", - "Generated JSONL file with - 4255 max words, 100 samples - at ./dataset/gen-word-4255-count.jsonl\n", - "Generated JSONL file with - 5620 max words, 100 samples - at ./dataset/gen-word-5620-count.jsonl\n", - "Generated JSONL file with - 5995 max words, 100 samples - at ./dataset/gen-word-5995-count.jsonl\n", - "Generated JSONL file with - 4830 max words, 100 samples - at ./dataset/gen-word-4830-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", - "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", - "Generated JSONL file with - 4730 max words, 100 samples - at ./dataset/gen-word-4730-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5655 max words - at ./dataset/shuffle-word-5655-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5615 max words - at ./dataset/shuffle-word-5615-count.jsonl\n", - "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", - "Generated JSONL file with - 5460 max words, 100 samples - at ./dataset/gen-word-5460-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4995 max words - at ./dataset/shuffle-word-4995-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4555 max words - at ./dataset/shuffle-word-4555-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4905 max words - at ./dataset/shuffle-word-4905-count.jsonlGenerated JSONL file with - 5605 max words, 100 samples - at ./dataset/gen-word-5605-count.jsonl\n", - "\n", - "Generated JSONL file with - 5440 max words, 100 samples - at ./dataset/gen-word-5440-count.jsonl\n", - "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5580 max words - at ./dataset/shuffle-word-5580-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5640 max words - at ./dataset/shuffle-word-5640-count.jsonl\n", - "Generated JSONL file with - 5295 max words, 100 samples - at ./dataset/gen-word-5295-count.jsonl\n", - "Generated JSONL file with - 4845 max words, 100 samples - at ./dataset/gen-word-4845-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5995 max words - at ./dataset/shuffle-word-5995-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5885 max words - at ./dataset/shuffle-word-5885-count.jsonl\n", - "Generated JSONL file with - 5735 max words, 100 samples - at ./dataset/gen-word-5735-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5915 max words - at ./dataset/shuffle-word-5915-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5740 max words - at ./dataset/shuffle-word-5740-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4895 max words - at ./dataset/shuffle-word-4895-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 4660 max words - at ./dataset/shuffle-word-4660-count.jsonl\n", - "Generated JSONL file with - 5405 max words, 100 samples - at ./dataset/gen-word-5405-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", - "Generated JSONL file with - 5045 max words, 100 samples - at ./dataset/gen-word-5045-count.jsonl\n", - "Generated JSONL file with - 5520 max words, 100 samples - at ./dataset/gen-word-5520-count.jsonl\n", - "Generated JSONL file with - 4885 max words, 100 samples - at ./dataset/gen-word-4885-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5955 max words - at ./dataset/shuffle-word-5955-count.jsonl\n", - "Generated JSONL file with - 5365 max words, 100 samples - at ./dataset/gen-word-5365-count.jsonl\n", - "Generated JSONL file with - 4955 max words, 100 samples - at ./dataset/gen-word-4955-count.jsonl\n", - "Generated JSONL file with - 5065 max words, 100 samples - at ./dataset/gen-word-5065-count.jsonl\n", - "Generated JSONL file with - 5910 max words, 100 samples - at ./dataset/gen-word-5910-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5755 max words - at ./dataset/shuffle-word-5755-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5820 max words - at ./dataset/shuffle-word-5820-count.jsonl\n", - "Generated JSONL file with - 5340 max words, 100 samples - at ./dataset/gen-word-5340-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5585 max words - at ./dataset/shuffle-word-5585-count.jsonl\n", + "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", + "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", + "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", "Generated JSONL file with - 6000 max words, 100 samples - at ./dataset/gen-word-6000-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5685 max words - at ./dataset/shuffle-word-5685-count.jsonl\n", - "Generated JSONL file with - 4735 max words, 100 samples - at ./dataset/gen-word-4735-count.jsonl\n", - "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", - "Generated JSONL file with - 4480 max words, 100 samples - at ./dataset/gen-word-4480-count.jsonl\n", - "Generated JSONL file with - 4495 max words, 100 samples - at ./dataset/gen-word-4495-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", - "Generated JSONL file with - 4970 max words, 100 samples - at ./dataset/gen-word-4970-count.jsonl\n", - "Generated JSONL file with - 5220 max words, 100 samples - at ./dataset/gen-word-5220-count.jsonl\n", - "Generated JSONL file with - 4595 max words, 100 samples - at ./dataset/gen-word-4595-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5840 max words - at ./dataset/shuffle-word-5840-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5920 max words - at ./dataset/shuffle-word-5920-count.jsonl\n", - "Generated JSONL file with - 5265 max words, 100 samples - at ./dataset/gen-word-5265-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5260 max words - at ./dataset/shuffle-word-5260-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", - "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", - "Generated JSONL file with - 5915 max words, 100 samples - at ./dataset/gen-word-5915-count.jsonl\n", - "Generated JSONL file with - 5030 max words, 100 samples - at ./dataset/gen-word-5030-count.jsonl\n", - "Generated JSONL file with - 4530 max words, 100 samples - at ./dataset/gen-word-4530-count.jsonl\n", - "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", - "Generated JSONL file with - 5830 max words, 100 samples - at ./dataset/gen-word-5830-count.jsonl\n", - "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", - "Generated a single JSONL file with 100 samples (100 token repeat) - 5780 max words - at ./dataset/shuffle-word-5780-count.jsonl\n", - "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", - "Generated JSONL file with - 5945 max words, 100 samples - at ./dataset/gen-word-5945-count.jsonl\n", - "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", - "Generated JSONL file with - 5595 max words, 100 samples - at ./dataset/gen-word-5595-count.jsonl\n", - "Generated JSONL file with - 5630 max words, 100 samples - at ./dataset/gen-word-5630-count.jsonl\n", - "Generated JSONL file with - 4430 max words, 100 samples - at ./dataset/gen-word-4430-count.jsonl\n", - "Generated JSONL file with - 5710 max words, 100 samples - at ./dataset/gen-word-5710-count.jsonl\n", - "Generated JSONL file with - 5135 max words, 100 samples - at ./dataset/gen-word-5135-count.jsonl\n", - "Generated JSONL file with - 5235 max words, 100 samples - at ./dataset/gen-word-5235-count.jsonl\n", - "Generated JSONL file with - 4460 max words, 100 samples - at ./dataset/gen-word-4460-count.jsonl\n", - "Generated JSONL file with - 4805 max words, 100 samples - at ./dataset/gen-word-4805-count.jsonl\n", - "Generated JSONL file with - 4870 max words, 100 samples - at ./dataset/gen-word-4870-count.jsonl\n", - "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", - "Generated JSONL file with - 4795 max words, 100 samples - at ./dataset/gen-word-4795-count.jsonl\n", - "Generated JSONL file with - 4470 max words, 100 samples - at ./dataset/gen-word-4470-count.jsonl\n", - "Generated JSONL file with - 4360 max words, 100 samples - at ./dataset/gen-word-4360-count.jsonl\n", - "Generated JSONL file with - 5785 max words, 100 samples - at ./dataset/gen-word-5785-count.jsonl\n", - "Generated JSONL file with - 5410 max words, 100 samples - at ./dataset/gen-word-5410-count.jsonl\n", - "Generated JSONL file with - 4435 max words, 100 samples - at ./dataset/gen-word-4435-count.jsonl\n", - "Generated JSONL file with - 5660 max words, 100 samples - at ./dataset/gen-word-5660-count.jsonl\n", - "Generated JSONL file with - 5655 max words, 100 samples - at ./dataset/gen-word-5655-count.jsonl\n", - "Generated JSONL file with - 4720 max words, 100 samples - at ./dataset/gen-word-4720-count.jsonl\n", - "Generated JSONL file with - 4540 max words, 100 samples - at ./dataset/gen-word-4540-count.jsonl\n", - "Generated JSONL file with - 5740 max words, 100 samples - at ./dataset/gen-word-5740-count.jsonl\n", - "Generated JSONL file with - 4410 max words, 100 samples - at ./dataset/gen-word-4410-count.jsonl\n", + "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", + "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", + "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", "Generated JSONL file with - 4925 max words, 100 samples - at ./dataset/gen-word-4925-count.jsonl\n", - "Generated JSONL file with - 4810 max words, 100 samples - at ./dataset/gen-word-4810-count.jsonl\n", - "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", - "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", - "Generated JSONL file with - 5890 max words, 100 samples - at ./dataset/gen-word-5890-count.jsonl\n", - "Generated JSONL file with - 4520 max words, 100 samples - at ./dataset/gen-word-4520-count.jsonl\n", - "Generated JSONL file with - 5505 max words, 100 samples - at ./dataset/gen-word-5505-count.jsonl\n", - "Generated JSONL file with - 5685 max words, 100 samples - at ./dataset/gen-word-5685-count.jsonl\n", - "Generated JSONL file with - 5935 max words, 100 samples - at ./dataset/gen-word-5935-count.jsonl\n", - "Generated JSONL file with - 4880 max words, 100 samples - at ./dataset/gen-word-4880-count.jsonl\n", - "Generated JSONL file with - 5105 max words, 100 samples - at ./dataset/gen-word-5105-count.jsonl\n", - "Generated JSONL file with - 5480 max words, 100 samples - at ./dataset/gen-word-5480-count.jsonl\n", - "Generated JSONL file with - 5985 max words, 100 samples - at ./dataset/gen-word-5985-count.jsonl\n", - "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", - "Generated JSONL file with - 5545 max words, 100 samples - at ./dataset/gen-word-5545-count.jsonl\n", - "Generated JSONL file with - 5870 max words, 100 samples - at ./dataset/gen-word-5870-count.jsonl\n", - "Generated a single JSONL file with 279154 samples (500 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", + "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", + "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", + "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", + "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", + "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", "## Done ##\n", - "total 6.2G\n", - "drwxrwxr-x 2 recursal recursal 132K Jan 22 23:49 .\n", - "drwxrwxr-x 5 recursal recursal 4.0K Jan 22 23:09 ..\n", - "-rw-rw-r-- 1 recursal recursal 495K Jan 22 23:49 gen-word-1000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 496K Jan 22 23:49 gen-word-1005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 530K Jan 22 23:49 gen-word-100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 500K Jan 22 23:49 gen-word-1010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 503K Jan 22 23:49 gen-word-1015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 497K Jan 22 23:49 gen-word-1020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 504K Jan 22 23:49 gen-word-1025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 508K Jan 22 23:49 gen-word-1030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 513K Jan 22 23:49 gen-word-1035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 510K Jan 22 23:49 gen-word-1040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 510K Jan 22 23:49 gen-word-1045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 516K Jan 22 23:49 gen-word-1050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 516K Jan 22 23:49 gen-word-1055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 55K Jan 22 23:49 gen-word-105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 522K Jan 22 23:49 gen-word-1060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 516K Jan 22 23:49 gen-word-1065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 524K Jan 22 23:49 gen-word-1070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 533K Jan 22 23:49 gen-word-1075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 538K Jan 22 23:49 gen-word-1080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 533K Jan 22 23:49 gen-word-1085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 532K Jan 22 23:49 gen-word-1090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 536K Jan 22 23:49 gen-word-1095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 99K Jan 22 23:49 gen-word-10-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 546K Jan 22 23:49 gen-word-1100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 544K Jan 22 23:49 gen-word-1105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 58K Jan 22 23:49 gen-word-110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 536K Jan 22 23:49 gen-word-1110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 544K Jan 22 23:49 gen-word-1115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 554K Jan 22 23:49 gen-word-1120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 551K Jan 22 23:49 gen-word-1125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 553K Jan 22 23:49 gen-word-1130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 555K Jan 22 23:49 gen-word-1135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 554K Jan 22 23:49 gen-word-1140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 563K Jan 22 23:49 gen-word-1145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 562K Jan 22 23:49 gen-word-1150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 561K Jan 22 23:49 gen-word-1155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 61K Jan 22 23:49 gen-word-115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 577K Jan 22 23:49 gen-word-1160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 576K Jan 22 23:49 gen-word-1165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 571K Jan 22 23:49 gen-word-1170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 573K Jan 22 23:49 gen-word-1175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 581K Jan 22 23:49 gen-word-1180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 572K Jan 22 23:49 gen-word-1185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 585K Jan 22 23:49 gen-word-1190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 581K Jan 22 23:49 gen-word-1195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 595K Jan 22 23:49 gen-word-1200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 596K Jan 22 23:49 gen-word-1205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 63K Jan 22 23:49 gen-word-120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 589K Jan 22 23:49 gen-word-1210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 601K Jan 22 23:49 gen-word-1215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 604K Jan 22 23:49 gen-word-1220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 605K Jan 22 23:49 gen-word-1225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 601K Jan 22 23:49 gen-word-1230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 614K Jan 22 23:49 gen-word-1235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 608K Jan 22 23:49 gen-word-1240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 609K Jan 22 23:49 gen-word-1245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 613K Jan 22 23:49 gen-word-1250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 612K Jan 22 23:49 gen-word-1255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 65K Jan 22 23:49 gen-word-125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 626K Jan 22 23:49 gen-word-1260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 616K Jan 22 23:49 gen-word-1265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 627K Jan 22 23:49 gen-word-1270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 627K Jan 22 23:49 gen-word-1275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 630K Jan 22 23:49 gen-word-1280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 631K Jan 22 23:49 gen-word-1285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 642K Jan 22 23:49 gen-word-1290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 636K Jan 22 23:49 gen-word-1295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 635K Jan 22 23:49 gen-word-1300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 635K Jan 22 23:49 gen-word-1305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 70K Jan 22 23:49 gen-word-130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 634K Jan 22 23:49 gen-word-1310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 639K Jan 22 23:49 gen-word-1315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 645K Jan 22 23:49 gen-word-1320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 655K Jan 22 23:49 gen-word-1325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 654K Jan 22 23:49 gen-word-1330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 654K Jan 22 23:49 gen-word-1335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 656K Jan 22 23:49 gen-word-1340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 657K Jan 22 23:49 gen-word-1345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 661K Jan 22 23:49 gen-word-1350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 663K Jan 22 23:49 gen-word-1355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 72K Jan 22 23:49 gen-word-135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 671K Jan 22 23:49 gen-word-1360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 669K Jan 22 23:49 gen-word-1365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 674K Jan 22 23:49 gen-word-1370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 674K Jan 22 23:49 gen-word-1375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 676K Jan 22 23:49 gen-word-1380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 690K Jan 22 23:49 gen-word-1385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 683K Jan 22 23:49 gen-word-1390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 686K Jan 22 23:49 gen-word-1395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 675K Jan 22 23:49 gen-word-1400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 685K Jan 22 23:49 gen-word-1405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 75K Jan 22 23:49 gen-word-140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 687K Jan 22 23:49 gen-word-1410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 693K Jan 22 23:49 gen-word-1415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 693K Jan 22 23:49 gen-word-1420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 697K Jan 22 23:49 gen-word-1425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 700K Jan 22 23:49 gen-word-1430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 704K Jan 22 23:49 gen-word-1435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 707K Jan 22 23:49 gen-word-1440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 702K Jan 22 23:49 gen-word-1445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 712K Jan 22 23:49 gen-word-1450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 716K Jan 22 23:49 gen-word-1455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 74K Jan 22 23:49 gen-word-145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 716K Jan 22 23:49 gen-word-1460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 720K Jan 22 23:49 gen-word-1465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 715K Jan 22 23:49 gen-word-1470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 711K Jan 22 23:49 gen-word-1475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 726K Jan 22 23:49 gen-word-1480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 728K Jan 22 23:49 gen-word-1485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 731K Jan 22 23:49 gen-word-1490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 737K Jan 22 23:49 gen-word-1495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 735K Jan 22 23:49 gen-word-1500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 80K Jan 22 23:49 gen-word-150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 80K Jan 22 23:49 gen-word-155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.5M Jan 22 23:49 gen-word-1565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 120K Jan 22 23:49 gen-word-15-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 84K Jan 22 23:49 gen-word-160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 88K Jan 22 23:49 gen-word-165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.6M Jan 22 23:49 gen-word-1685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 86K Jan 22 23:49 gen-word-170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 88K Jan 22 23:49 gen-word-175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.7M Jan 22 23:49 gen-word-1785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 92K Jan 22 23:49 gen-word-180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 95K Jan 22 23:49 gen-word-185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.8M Jan 22 23:49 gen-word-1885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 97K Jan 22 23:49 gen-word-190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 100K Jan 22 23:49 gen-word-195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-1990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.9M Jan 22 23:49 gen-word-1995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 105K Jan 22 23:49 gen-word-200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 108K Jan 22 23:49 gen-word-205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.0M Jan 22 23:49 gen-word-2090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 147K Jan 22 23:49 gen-word-20-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 109K Jan 22 23:49 gen-word-210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 111K Jan 22 23:49 gen-word-215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.1M Jan 22 23:49 gen-word-2195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 112K Jan 22 23:49 gen-word-220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 112K Jan 22 23:49 gen-word-225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 120K Jan 22 23:49 gen-word-230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.2M Jan 22 23:49 gen-word-2325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 122K Jan 22 23:49 gen-word-235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 119K Jan 22 23:49 gen-word-240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.3M Jan 22 23:49 gen-word-2435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 123K Jan 22 23:49 gen-word-245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 128K Jan 22 23:49 gen-word-250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.4M Jan 22 23:49 gen-word-2510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 129K Jan 22 23:49 gen-word-255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 168K Jan 22 23:49 gen-word-25-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 128K Jan 22 23:49 gen-word-260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 gen-word-2630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 133K Jan 22 23:49 gen-word-265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 141K Jan 22 23:49 gen-word-270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 gen-word-2735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 139K Jan 22 23:49 gen-word-275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 138K Jan 22 23:49 gen-word-280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.7M Jan 22 23:49 gen-word-2840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 142K Jan 22 23:49 gen-word-285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 144K Jan 22 23:49 gen-word-290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.8M Jan 22 23:49 gen-word-2945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 146K Jan 22 23:49 gen-word-295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-2995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 152K Jan 22 23:49 gen-word-300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.9M Jan 22 23:49 gen-word-3035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 153K Jan 22 23:49 gen-word-305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 198K Jan 22 23:49 gen-word-30-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 159K Jan 22 23:49 gen-word-310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.0M Jan 22 23:49 gen-word-3150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 157K Jan 22 23:49 gen-word-315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 158K Jan 22 23:49 gen-word-320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.1M Jan 22 23:49 gen-word-3255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 164K Jan 22 23:49 gen-word-325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 164K Jan 22 23:49 gen-word-330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 167K Jan 22 23:49 gen-word-335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.2M Jan 22 23:49 gen-word-3365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 172K Jan 22 23:49 gen-word-340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 176K Jan 22 23:49 gen-word-345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.3M Jan 22 23:49 gen-word-3460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 174K Jan 22 23:49 gen-word-350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 179K Jan 22 23:49 gen-word-355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.4M Jan 22 23:49 gen-word-3595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 217K Jan 22 23:49 gen-word-35-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 179K Jan 22 23:49 gen-word-360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 182K Jan 22 23:49 gen-word-365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.5M Jan 22 23:49 gen-word-3680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 182K Jan 22 23:49 gen-word-370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 187K Jan 22 23:49 gen-word-375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.6M Jan 22 23:49 gen-word-3780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 188K Jan 22 23:49 gen-word-380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 190K Jan 22 23:49 gen-word-385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.7M Jan 22 23:49 gen-word-3895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 196K Jan 22 23:49 gen-word-390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 194K Jan 22 23:49 gen-word-395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.8M Jan 22 23:49 gen-word-3990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-3995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 200K Jan 22 23:49 gen-word-400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 202K Jan 22 23:49 gen-word-405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 243K Jan 22 23:49 gen-word-40-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 202K Jan 22 23:49 gen-word-410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 3.9M Jan 22 23:49 gen-word-4135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 208K Jan 22 23:49 gen-word-415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.0M Jan 22 23:49 gen-word-4205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 212K Jan 22 23:49 gen-word-420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 211K Jan 22 23:49 gen-word-425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 214K Jan 22 23:49 gen-word-430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.1M Jan 22 23:49 gen-word-4310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 215K Jan 22 23:49 gen-word-435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 223K Jan 22 23:49 gen-word-440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.2M Jan 22 23:49 gen-word-4420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 222K Jan 22 23:49 gen-word-445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 231K Jan 22 23:49 gen-word-450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.3M Jan 22 23:49 gen-word-4520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 227K Jan 22 23:49 gen-word-455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 268K Jan 22 23:49 gen-word-45-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 226K Jan 22 23:49 gen-word-460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.4M Jan 22 23:49 gen-word-4620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 232K Jan 22 23:49 gen-word-465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 232K Jan 22 23:49 gen-word-470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.5M Jan 22 23:49 gen-word-4745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 235K Jan 22 23:49 gen-word-475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 247K Jan 22 23:49 gen-word-480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.6M Jan 22 23:49 gen-word-4830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 240K Jan 22 23:49 gen-word-485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 243K Jan 22 23:49 gen-word-490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.7M Jan 22 23:49 gen-word-4940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 248K Jan 22 23:49 gen-word-495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-4995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 249K Jan 22 23:49 gen-word-500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.8M Jan 22 23:49 gen-word-5045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 253K Jan 22 23:49 gen-word-505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 292K Jan 22 23:49 gen-word-50-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 252K Jan 22 23:49 gen-word-510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 4.9M Jan 22 23:49 gen-word-5150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 258K Jan 22 23:49 gen-word-515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 259K Jan 22 23:49 gen-word-520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 263K Jan 22 23:49 gen-word-525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.0M Jan 22 23:49 gen-word-5265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 268K Jan 22 23:49 gen-word-530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 265K Jan 22 23:49 gen-word-535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.1M Jan 22 23:49 gen-word-5390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 268K Jan 22 23:49 gen-word-540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 272K Jan 22 23:49 gen-word-545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.2M Jan 22 23:49 gen-word-5485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 271K Jan 22 23:49 gen-word-550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 279K Jan 22 23:49 gen-word-555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.3M Jan 22 23:49 gen-word-5575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 314K Jan 22 23:49 gen-word-55-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 276K Jan 22 23:49 gen-word-560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 280K Jan 22 23:49 gen-word-565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.4M Jan 22 23:49 gen-word-5685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 287K Jan 22 23:49 gen-word-570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 283K Jan 22 23:49 gen-word-575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 294K Jan 22 23:49 gen-word-580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.5M Jan 22 23:49 gen-word-5825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 288K Jan 22 23:49 gen-word-585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.6M Jan 22 23:49 gen-word-5885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 297K Jan 22 23:49 gen-word-590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 297K Jan 22 23:49 gen-word-595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.8M Jan 22 23:49 gen-word-5975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.8M Jan 22 23:49 gen-word-5980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.7M Jan 22 23:49 gen-word-5990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.8M Jan 22 23:49 gen-word-5995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 74K Jan 22 23:49 gen-word-5-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 5.8M Jan 22 23:49 gen-word-6000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 298K Jan 22 23:49 gen-word-600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 298K Jan 22 23:49 gen-word-605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 343K Jan 22 23:49 gen-word-60-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 307K Jan 22 23:49 gen-word-610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 301K Jan 22 23:49 gen-word-615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 301K Jan 22 23:49 gen-word-620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 306K Jan 22 23:49 gen-word-625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 314K Jan 22 23:49 gen-word-630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 321K Jan 22 23:49 gen-word-635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 314K Jan 22 23:49 gen-word-640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 311K Jan 22 23:49 gen-word-645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 329K Jan 22 23:49 gen-word-650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 323K Jan 22 23:49 gen-word-655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 361K Jan 22 23:49 gen-word-65-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 328K Jan 22 23:49 gen-word-660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 324K Jan 22 23:49 gen-word-665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 326K Jan 22 23:49 gen-word-670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 334K Jan 22 23:49 gen-word-675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 337K Jan 22 23:49 gen-word-680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 339K Jan 22 23:49 gen-word-685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 341K Jan 22 23:49 gen-word-690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 341K Jan 22 23:49 gen-word-695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 349K Jan 22 23:49 gen-word-700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 342K Jan 22 23:49 gen-word-705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 389K Jan 22 23:49 gen-word-70-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 355K Jan 22 23:49 gen-word-710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 348K Jan 22 23:49 gen-word-715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 357K Jan 22 23:49 gen-word-720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 360K Jan 22 23:49 gen-word-725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 360K Jan 22 23:49 gen-word-730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 362K Jan 22 23:49 gen-word-735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 360K Jan 22 23:49 gen-word-740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 366K Jan 22 23:49 gen-word-745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 374K Jan 22 23:49 gen-word-750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 374K Jan 22 23:49 gen-word-755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 407K Jan 22 23:49 gen-word-75-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 375K Jan 22 23:49 gen-word-760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 376K Jan 22 23:49 gen-word-765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 370K Jan 22 23:49 gen-word-770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 378K Jan 22 23:49 gen-word-775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 389K Jan 22 23:49 gen-word-780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 391K Jan 22 23:49 gen-word-785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 389K Jan 22 23:49 gen-word-790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 397K Jan 22 23:49 gen-word-795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 394K Jan 22 23:49 gen-word-800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 397K Jan 22 23:49 gen-word-805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 434K Jan 22 23:49 gen-word-80-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 403K Jan 22 23:49 gen-word-810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 399K Jan 22 23:49 gen-word-815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 407K Jan 22 23:49 gen-word-820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 407K Jan 22 23:49 gen-word-825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 410K Jan 22 23:49 gen-word-830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 416K Jan 22 23:49 gen-word-835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 414K Jan 22 23:49 gen-word-840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 414K Jan 22 23:49 gen-word-845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 420K Jan 22 23:49 gen-word-850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 418K Jan 22 23:49 gen-word-855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 457K Jan 22 23:49 gen-word-85-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 421K Jan 22 23:49 gen-word-860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 422K Jan 22 23:49 gen-word-865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 430K Jan 22 23:49 gen-word-870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 428K Jan 22 23:49 gen-word-875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 437K Jan 22 23:49 gen-word-880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 435K Jan 22 23:49 gen-word-885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 439K Jan 22 23:49 gen-word-890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 443K Jan 22 23:49 gen-word-895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 442K Jan 22 23:49 gen-word-900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 448K Jan 22 23:49 gen-word-905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 498K Jan 22 23:49 gen-word-90-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 447K Jan 22 23:49 gen-word-910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 456K Jan 22 23:49 gen-word-915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 447K Jan 22 23:49 gen-word-920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 456K Jan 22 23:49 gen-word-925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 462K Jan 22 23:49 gen-word-930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 457K Jan 22 23:49 gen-word-935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 462K Jan 22 23:49 gen-word-940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 462K Jan 22 23:49 gen-word-945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 468K Jan 22 23:49 gen-word-950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 465K Jan 22 23:49 gen-word-955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 500K Jan 22 23:49 gen-word-95-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 466K Jan 22 23:49 gen-word-960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 477K Jan 22 23:49 gen-word-965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 472K Jan 22 23:49 gen-word-970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 481K Jan 22 23:49 gen-word-975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 483K Jan 22 23:49 gen-word-980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 477K Jan 22 23:49 gen-word-985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 489K Jan 22 23:49 gen-word-990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 483K Jan 22 23:49 gen-word-995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 14M Jan 22 23:49 shuffle-word-100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 26M Jan 22 23:49 shuffle-word-10-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-1500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 22M Jan 22 23:49 shuffle-word-15-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-1995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 19M Jan 22 23:49 shuffle-word-20-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 18M Jan 22 23:49 shuffle-word-25-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.6M Jan 22 23:49 shuffle-word-2600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-2995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.4M Jan 22 23:49 shuffle-word-300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 17M Jan 22 23:49 shuffle-word-30-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 17M Jan 22 23:49 shuffle-word-35-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-3995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 16M Jan 22 23:49 shuffle-word-40-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 16M Jan 22 23:49 shuffle-word-45-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-4995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5005-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5010-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5015-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5020-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5025-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5030-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5035-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5040-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5045-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5050-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5055-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5060-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5065-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5070-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5075-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5080-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5085-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5090-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5095-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-50-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5100-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5105-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5110-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5115-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5120-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5125-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5130-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5135-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5140-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5145-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5150-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5155-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5160-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5165-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5170-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5175-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5180-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5185-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5190-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5195-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5200-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5205-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5210-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5215-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5220-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5225-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5230-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5235-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5240-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5245-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5250-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5255-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5260-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5265-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5270-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5275-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5280-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5285-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5290-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5295-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5300-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5305-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5310-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5315-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5320-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5325-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5330-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5335-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5340-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5345-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5350-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5355-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5360-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5365-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5370-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5375-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5380-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5385-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5390-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5395-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5400-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5405-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5410-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5415-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5420-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5425-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5430-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5435-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5440-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5445-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5450-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5455-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5460-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5465-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5470-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5475-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5480-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5485-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5490-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5495-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5500-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5505-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5510-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5515-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5520-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5525-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5530-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5535-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5540-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5545-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5550-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-555-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-55-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-560-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-565-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-570-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-575-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-580-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-585-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-590-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-595-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-5995-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 40M Jan 22 23:49 shuffle-word-5-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 2.5M Jan 22 23:49 shuffle-word-6000-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-600-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-605-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-60-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-610-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-615-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-620-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-625-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-630-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-635-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-640-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-645-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-650-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-655-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-65-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-660-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-665-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-670-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-675-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-680-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-685-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-690-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-695-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-700-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-705-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-70-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-710-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-715-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-720-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-725-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-730-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-735-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-740-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-745-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-750-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-755-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-75-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-760-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-765-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-770-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-775-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-780-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-785-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-790-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-795-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-800-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-805-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-80-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-810-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-815-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-820-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-825-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-830-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-835-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-840-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-845-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-850-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-855-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 15M Jan 22 23:49 shuffle-word-85-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-860-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-865-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-870-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-875-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-880-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-885-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-890-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-895-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-900-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-905-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 14M Jan 22 23:49 shuffle-word-90-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-910-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-915-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-920-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-925-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-930-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-935-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-940-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-945-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-950-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-955-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 14M Jan 22 23:49 shuffle-word-95-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-960-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-965-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-970-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-975-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-980-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-985-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-990-count.jsonl\n", - "-rw-rw-r-- 1 recursal recursal 1.3M Jan 22 23:49 shuffle-word-995-count.jsonl\n" + "total 1.8G\n", + "drwxr-xr-x 2 root root 40K Jan 23 18:27 .\n", + "drwxr-xr-x 5 root root 4.0K Jan 23 13:12 ..\n", + "-rw-r--r-- 1 root root 20K Jan 23 18:27 gen-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 105K Jan 23 18:27 gen-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 739K Jan 23 18:27 gen-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 753K Jan 23 18:27 gen-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 754K Jan 23 18:27 gen-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 749K Jan 23 18:27 gen-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 765K Jan 23 18:27 gen-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 770K Jan 23 18:27 gen-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 784K Jan 23 18:27 gen-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 779K Jan 23 18:27 gen-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 791K Jan 23 18:27 gen-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 805K Jan 23 18:27 gen-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 90K Jan 23 18:27 gen-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 812K Jan 23 18:27 gen-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 823K Jan 23 18:27 gen-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 832K Jan 23 18:27 gen-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 835K Jan 23 18:27 gen-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 840K Jan 23 18:27 gen-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 848K Jan 23 18:27 gen-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 855K Jan 23 18:27 gen-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 861K Jan 23 18:27 gen-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 872K Jan 23 18:27 gen-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 879K Jan 23 18:27 gen-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 92K Jan 23 18:27 gen-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 892K Jan 23 18:27 gen-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 899K Jan 23 18:27 gen-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 886K Jan 23 18:27 gen-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 898K Jan 23 18:27 gen-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 911K Jan 23 18:27 gen-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 914K Jan 23 18:27 gen-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 925K Jan 23 18:27 gen-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 937K Jan 23 18:27 gen-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 949K Jan 23 18:27 gen-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 955K Jan 23 18:27 gen-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 18:27 gen-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 953K Jan 23 18:27 gen-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 953K Jan 23 18:27 gen-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 965K Jan 23 18:27 gen-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 979K Jan 23 18:27 gen-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 989K Jan 23 18:27 gen-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 986K Jan 23 18:27 gen-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 990K Jan 23 18:27 gen-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1017K Jan 23 18:27 gen-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1000K Jan 23 18:27 gen-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 111K Jan 23 18:27 gen-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 25K Jan 23 18:27 gen-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 115K Jan 23 18:27 gen-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 123K Jan 23 18:27 gen-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 133K Jan 23 18:27 gen-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 142K Jan 23 18:27 gen-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 147K Jan 23 18:27 gen-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 30K Jan 23 18:27 gen-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 152K Jan 23 18:27 gen-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 160K Jan 23 18:27 gen-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 166K Jan 23 18:27 gen-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 177K Jan 23 18:27 gen-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 181K Jan 23 18:27 gen-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 34K Jan 23 18:27 gen-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 193K Jan 23 18:27 gen-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 204K Jan 23 18:27 gen-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 201K Jan 23 18:27 gen-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 214K Jan 23 18:27 gen-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 220K Jan 23 18:27 gen-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 40K Jan 23 18:27 gen-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 226K Jan 23 18:27 gen-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 gen-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 gen-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 gen-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 234K Jan 23 18:27 gen-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 gen-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 gen-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 238K Jan 23 18:27 gen-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 247K Jan 23 18:27 gen-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 18:27 gen-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 18:27 gen-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 18:27 gen-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 gen-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 256K Jan 23 18:27 gen-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 gen-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 gen-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 18:27 gen-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 gen-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 44K Jan 23 18:27 gen-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 262K Jan 23 18:27 gen-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 18:27 gen-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 18:27 gen-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 269K Jan 23 18:27 gen-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 277K Jan 23 18:27 gen-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 18:27 gen-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 18:27 gen-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 18:27 gen-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 283K Jan 23 18:27 gen-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 292K Jan 23 18:27 gen-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 18:27 gen-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 18:27 gen-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 18:27 gen-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 48K Jan 23 18:27 gen-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 297K Jan 23 18:27 gen-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 18:27 gen-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 18:27 gen-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 18:27 gen-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 18:27 gen-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 313K Jan 23 18:27 gen-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 315K Jan 23 18:27 gen-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 18:27 gen-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 18:27 gen-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 324K Jan 23 18:27 gen-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 18:27 gen-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 gen-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 gen-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 gen-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 324K Jan 23 18:27 gen-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 gen-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 18:27 gen-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 18:27 gen-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 18:27 gen-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 54K Jan 23 18:27 gen-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 340K Jan 23 18:27 gen-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 18:27 gen-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 348K Jan 23 18:27 gen-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 18:27 gen-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 18:27 gen-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 346K Jan 23 18:27 gen-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 18:27 gen-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 18:27 gen-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 18:27 gen-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 18:27 gen-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 352K Jan 23 18:27 gen-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 18:27 gen-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 18:27 gen-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 369K Jan 23 18:27 gen-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 18:27 gen-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 18:27 gen-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 57K Jan 23 18:27 gen-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 375K Jan 23 18:27 gen-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 18:27 gen-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 18:27 gen-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 18:27 gen-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 18:27 gen-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 378K Jan 23 18:27 gen-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 18:27 gen-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 18:27 gen-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 18:27 gen-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 388K Jan 23 18:27 gen-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 394K Jan 23 18:27 gen-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 18:27 gen-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 18:27 gen-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 18:27 gen-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 18:27 gen-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 404K Jan 23 18:27 gen-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 18:27 gen-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 18:27 gen-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 18:27 gen-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 18:27 gen-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 62K Jan 23 18:27 gen-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 404K Jan 23 18:27 gen-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 18:27 gen-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 18:27 gen-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 18:27 gen-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 18:27 gen-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 415K Jan 23 18:27 gen-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 18:27 gen-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 18:27 gen-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 18:27 gen-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 18:27 gen-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 425K Jan 23 18:27 gen-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 18:27 gen-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 18:27 gen-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 18:27 gen-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 433K Jan 23 18:27 gen-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 438K Jan 23 18:27 gen-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 69K Jan 23 18:27 gen-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 447K Jan 23 18:27 gen-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 448K Jan 23 18:27 gen-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 456K Jan 23 18:27 gen-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 471K Jan 23 18:27 gen-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 468K Jan 23 18:27 gen-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 73K Jan 23 18:27 gen-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 489K Jan 23 18:27 gen-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 481K Jan 23 18:27 gen-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 496K Jan 23 18:27 gen-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 511K Jan 23 18:27 gen-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 510K Jan 23 18:27 gen-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 77K Jan 23 18:27 gen-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 523K Jan 23 18:27 gen-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 531K Jan 23 18:27 gen-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 534K Jan 23 18:27 gen-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 544K Jan 23 18:27 gen-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 546K Jan 23 18:27 gen-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 83K Jan 23 18:27 gen-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 548K Jan 23 18:27 gen-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 564K Jan 23 18:27 gen-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 576K Jan 23 18:27 gen-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 582K Jan 23 18:27 gen-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 588K Jan 23 18:27 gen-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 87K Jan 23 18:27 gen-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 589K Jan 23 18:27 gen-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 598K Jan 23 18:27 gen-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 608K Jan 23 18:27 gen-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 616K Jan 23 18:27 gen-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 626K Jan 23 18:27 gen-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 94K Jan 23 18:27 gen-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 623K Jan 23 18:27 gen-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 640K Jan 23 18:27 gen-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 637K Jan 23 18:27 gen-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 651K Jan 23 18:27 gen-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 660K Jan 23 18:27 gen-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 96K Jan 23 18:27 gen-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 663K Jan 23 18:27 gen-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 673K Jan 23 18:27 gen-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 675K Jan 23 18:27 gen-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 688K Jan 23 18:27 gen-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 693K Jan 23 18:27 gen-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 18:27 gen-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 700K Jan 23 18:27 gen-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 713K Jan 23 18:27 gen-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 723K Jan 23 18:27 gen-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 726K Jan 23 18:27 gen-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 727K Jan 23 18:27 gen-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 shuffle-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 18:27 shuffle-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 shuffle-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 shuffle-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 shuffle-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 shuffle-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 shuffle-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 18:27 shuffle-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 shuffle-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 18:27 shuffle-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 shuffle-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 18:27 shuffle-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 shuffle-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 8.0M Jan 23 18:27 shuffle-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 shuffle-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 shuffle-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 shuffle-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 18:27 shuffle-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 18:27 shuffle-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 12K Jan 23 18:27 word-2-count.jsonl\n", + "-rw-r--r-- 1 root root 14K Jan 23 18:27 word-4-count.jsonl\n" ] } ], @@ -6901,29 +2249,30 @@ "echo \"## Generating word reptition dataset ##\"\n", "\n", "#\n", - "# Training set for < 100 words\n", - "# We bump this aggressively, as its used to fill in packing\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", "for i in {5..100..5} \n", "do\n", - " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 500 & \n", - " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 500 & \n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", "done\n", "\n", "#\n", - "# Ramping up the 50+ - 1500 words dataset\n", - "# This is to ensure there is ramp from the previous models\n", + "# Ramping up the 100+ - 3000 words dataset\n", "# \n", - "for i in {105..1500..5} \n", + "for i in {110..3000..10} \n", "do\n", - " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 50 & \n", - " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 50 & \n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", "done\n", "\n", "#\n", - "# Ramping up the 1500+ - 6000 words dataset\n", + "# Ramping up the 3000+ - 400 words dataset\n", "# \n", - "for i in {1505..6000..5} \n", + "for i in {3000..6000..25} \n", "do\n", " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", @@ -6937,27 +2286,33 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, + "id": "fa2c8f0f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resolving data files: 100%|█████████████| 2400/2400 [00:00<00:00, 365795.62it/s]\n", - "Generating train split: 1258813 examples [00:16, 78115.78 examples/s] \n", - "Map (num_proc=160): 100%|███| 1258813/1258813 [01:30<00:00, 13918.46 examples/s]\n", - "Filter (num_proc=160): 100%|█| 1258813/1258813 [00:49<00:00, 25674.24 examples/s\n", - "Map (num_proc=160): 100%|██| 1238639/1238639 [00:06<00:00, 189798.96 examples/s]\n", - "Map (num_proc=160): 100%|███| 1238639/1238639 [01:09<00:00, 17869.03 examples/s]\n", - "Map (num_proc=160): 100%|██████| 127252/127252 [00:15<00:00, 8260.92 examples/s]\n", - "Saving the dataset (11/11 shards): 100%|█| 127252/127252 [00:05<00:00, 21357.11 \n", - "Saving the dataset (1/1 shards): 100%|█| 6225/6225 [00:00<00:00, 62482.00 exampl\n" + "Resolving data files: 100%|███████████████| 862/862 [00:00<00:00, 141224.56it/s]\n", + "Generating train split: 312299 examples [00:08, 38514.98 examples/s] \n", + "Map (num_proc=96): 100%|███████| 312299/312299 [01:05<00:00, 4748.29 examples/s]\n", + "Filter (num_proc=96): 100%|███| 312299/312299 [00:28<00:00, 11020.85 examples/s]\n", + "Map (num_proc=96): 100%|██████| 307944/307944 [00:06<00:00, 50786.71 examples/s]\n", + "Map (num_proc=96): 100%|███████| 307944/307944 [00:41<00:00, 7509.42 examples/s]\n", + "Map (num_proc=96): 100%|█████████| 36875/36875 [00:13<00:00, 2686.82 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 36875/36875 [00:01<00:00, 23914.80 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 1548/1548 [00:00<00:00, 35414.86 exampl\n" ] } ], "source": [ "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", "!cd \"{TRAINER_DIR}\" && \\\n", " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", "\n", @@ -6965,34 +2320,43 @@ "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" ] }, + { + "cell_type": "markdown", + "id": "60244472", + "metadata": {}, + "source": [ + "## Finetune 1 (0 -> 2*2k) : The actual tune!" + ] + }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, + "id": "4025b7b8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2024-01-23 00:03:23,061] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8x4090] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=2048, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=2048'], args=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8x4090] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=2048, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=2048'].\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 3160641161\n", - "Seed set to 3160641161\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "[2024-01-23 18:30:56,182] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=4', '--model.ctx_len=8192'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=4', '--model.ctx_len=8192'].\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 3725758400\n", + "Seed set to 3725758400\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", "Building extension module wkv5...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "ninja: no work to do.\n", "Loading extension module wkv5...\n", "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", "---\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "IPU available: False, using: 0 IPUs\n", @@ -7003,86 +2367,67 @@ " - target_batch_size: 256\n", " - num_nodes: 1\n", " - num_devices: 8\n", - " - microbatch_size: 8\n", - " - accumulate_grad_batches: 4\n", + " - microbatch_size: 4\n", + " - accumulate_grad_batches: 8\n", " - effective_batch_size: 256\n", "\n", - "[rank: 0] Seed set to 3160641161\n", + "[rank: 0] Seed set to 3725758400\n", "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", - "[2024-01-23 00:03:40,928] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-23 00:03:41,167] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-23 00:03:41,174] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-23 00:03:41,174] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-23 00:03:41,177] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-23 00:03:41,194] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[2024-01-23 00:03:41,253] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'\n", - "[rank: 7] Seed set to 3160641161\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "[2024-01-23 18:31:17,933] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,934] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,934] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,936] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,936] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,937] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,937] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 2] Seed set to 3725758400\n", + "[rank: 3] Seed set to 3725758400\n", + "[rank: 5] Seed set to 3725758400\n", + "[rank: 4] Seed set to 3725758400\n", + "[rank: 1] Seed set to 3725758400\n", + "[rank: 7] Seed set to 3725758400\n", + "[rank: 6] Seed set to 3725758400\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "[rank: 2] Seed set to 3160641161\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "[rank: 4] Seed set to 3160641161\n", - "[rank: 5] Seed set to 3160641161\n", - "[rank: 1] Seed set to 3160641161\n", - "[rank: 3] Seed set to 3160641161\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", - "[rank: 6] Seed set to 3160641161\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", - "Building extension module wkv5...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "ninja: no work to do.\n", - "Loading extension module wkv5...\n", - "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", - "---\n", - "---\n", - "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "---\n", "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Building extension module wkv5...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "ninja: no work to do.\n", @@ -7096,36 +2441,53 @@ "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", "---\n", "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", "Loading extension module wkv5...\n", "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", "---\n", - "[rank: 7] Seed set to 3160641161\n", + "[rank: 7] Seed set to 3725758400\n", + "[rank: 6] Seed set to 3725758400\n", "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", - "[rank: 4] Seed set to 3160641161\n", - "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", - "[rank: 2] Seed set to 3160641161\n", - "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", - "[rank: 6] Seed set to 3160641161\n", "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", - "[rank: 3] Seed set to 3160641161\n", - "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", - "[rank: 1] Seed set to 3160641161\n", + "[rank: 1] Seed set to 3725758400\n", "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", - "[rank: 5] Seed set to 3160641161\n", + "[rank: 3] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 4] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 2] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 3725758400\n", "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_000416-1quitpef\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_183155-ss3js48a\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8x4090] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=2048, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8xA100] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/1quitpef\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/ss3js48a\u001b[0m\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", "#\n", "# RWKV lighting_trainer.py important notes \n", "# https://github.com/RWKV/RWKV-infctx-trainer \n", @@ -7133,62 +2495,59 @@ "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", "# - When resuming from checkpoint, the estimated time is inaccurate\n", - "#\n", - "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", "\n", "[RWKV.model] Configuring optimizer with\n", - " - lr_init: 4.000e-04 (0.0004)\n", - " - lr_final: 3.000e-04 (0.0003)\n", - "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + " - lr_init: 2.000e-04 (0.0002)\n", + " - lr_final: 1.000e-04 (0.0001)\n", "\n", - "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", - "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", "Detected CUDA files, patching ldflags\n", - "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", "Building extension module fused_adam...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "ninja: no work to do.\n", "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.06246376037597656 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.013622045516967773 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Loading `train_dataloader` to estimate number of stepping batches.\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10220813751220703 seconds\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", "Loading extension module fused_adam...\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.101654052734375 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Loading extension module fused_adam...\n", - "Loading extension module fused_adam...\n", - "Time to load fused_adam op: 0.10210061073303223 seconds\n", - "Time to load fused_adam op: 0.1024923324584961 seconds\n", - "Time to load fused_adam op: 0.10265374183654785 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.10231280326843262 seconds\n", + "Time to load fused_adam op: 0.10230112075805664 secondsTime to load fused_adam op: 0.10238885879516602 seconds\n", + "\n", + "Time to load fused_adam op: 0.10254812240600586 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Time to load fused_adam op: 0.10338902473449707 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.10258603096008301 seconds\n", - "Time to load fused_adam op: 0.10260725021362305 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", - "Time to load fused_adam op: 0.1026144027709961 seconds\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10484576225280762 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", "\n", " | Name | Type | Params\n", @@ -7202,28 +2561,259 @@ "0 Non-trainable params\n", "1.6 B Total params\n", "6,311.018 Total estimated model params size (MB)\n", - "Epoch 0: 5%| | 100/1989 [06:48<2:08:42, 0.24it/s, v_num=tpef, train/loss=4.56/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + "Epoch 0: 17%|▏| 200/1153 [12:26<59:16, 0.27it/s, v_num=s48a, train/loss=0.013]/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", " warnings.warn(\n", - "Epoch 0: 62%|▌| 1243/1989 [1:22:56<49:46, 0.25it/s, v_num=tpef, train/loss=7.7^C\n" + "Epoch 0: 100%|█| 1153/1153 [1:04:42<00:00, 0.30it/s, v_num=s48a, train/loss=2.4\n", + "Validation: | | 0/? [00:00 8k) : The actual tune!\n", "# Start the finetune model training\n", "!cd \"{TRAINER_DIR}\" && \\\n", " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", @@ -7231,17 +2821,289 @@ " -c \"{NOTEBOOK_DIR}/stage-2-tune.yaml\" \\\n", " --model.load_model=\"../model/Memory-Tune-Stage-1-{MODEL_NAME}\" \\\n", " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/\" \\\n", - " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-2 (bs=256, train-ctx=2048, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-2 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", " --trainer.devices=\"{GPU_DEVICES}\" \\\n", - " --trainer.microbatch_size=8 \\\n", - " --model.ctx_len=2048" + " --trainer.microbatch_size=4 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bab43e37", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-23 19:38:22,745] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "Processing zero checkpoint '../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/last.ckpt/checkpoint'\n", + "Detected checkpoint of type zero stage 1, world_size: 8\n", + "Parsing checkpoint created by deepspeed==0.12.6\n", + "Reconstructed fp32 state dict with 534 params 1577754624 elements\n", + "Saving bf16 state dict to ../model/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth\n", + "-rw-r--r-- 1 root root 3.0G Jan 23 19:38 ../model/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "53546121", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SCRIPT_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/memory_script\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n", + "MODEL_CODE_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "[2024-01-23 19:38:44,924] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/workspace/RWKV-infctx-trainer/RWKV-v5/src/model.py:1390: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " batch_tokens = torch.tensor(\n", + "###\n", + "### Model validation start ###\n", + "###\n", + "## Model validation for 5 tokens : 100.0% similarity, with 5 matched token, and 0 token mismatch\n", + "## Model validation for 10 tokens : 100.0% similarity, with 10 matched token, and 0 token mismatch\n", + "## Model validation for 15 tokens : 100.0% similarity, with 15 matched token, and 0 token mismatch\n", + "## Model validation for 20 tokens : 100.0% similarity, with 20 matched token, and 0 token mismatch\n", + "## Model validation for 25 tokens : 100.0% similarity, with 25 matched token, and 0 token mismatch\n", + "## Model validation for 30 tokens : 100.0% similarity, with 30 matched token, and 0 token mismatch\n", + "## Model validation for 35 tokens : 100.0% similarity, with 35 matched token, and 0 token mismatch\n", + "## Model validation for 40 tokens : 100.0% similarity, with 40 matched token, and 0 token mismatch\n", + "## Model validation for 45 tokens : 100.0% similarity, with 45 matched token, and 0 token mismatch\n", + "## Model validation for 50 tokens : 100.0% similarity, with 50 matched token, and 0 token mismatch\n", + "## Model validation for 55 tokens : 100.0% similarity, with 55 matched token, and 0 token mismatch\n", + "## Model validation for 60 tokens : 100.0% similarity, with 60 matched token, and 0 token mismatch\n", + "## Model validation for 65 tokens : 100.0% similarity, with 65 matched token, and 0 token mismatch\n", + "## Model validation for 70 tokens : 100.0% similarity, with 70 matched token, and 0 token mismatch\n", + "## Model validation for 75 tokens : 100.0% similarity, with 75 matched token, and 0 token mismatch\n", + "## Model validation for 80 tokens : 100.0% similarity, with 80 matched token, and 0 token mismatch\n", + "## Model validation for 85 tokens : 100.0% similarity, with 85 matched token, and 0 token mismatch\n", + "## Model validation for 90 tokens : 100.0% similarity, with 90 matched token, and 0 token mismatch\n", + "## Model validation for 95 tokens : 100.0% similarity, with 95 matched token, and 0 token mismatch\n", + "## Model validation for 100 tokens : 100.0% similarity, with 100 matched token, and 0 token mismatch\n", + "## Model validation for 105 tokens : 100.0% similarity, with 105 matched token, and 0 token mismatch\n", + "## Model validation for 110 tokens : 100.0% similarity, with 110 matched token, and 0 token mismatch\n", + "## Model validation for 115 tokens : 100.0% similarity, with 115 matched token, and 0 token mismatch\n", + "## Model validation for 120 tokens : 100.0% similarity, with 120 matched token, and 0 token mismatch\n", + "## Model validation for 125 tokens : 100.0% similarity, with 125 matched token, and 0 token mismatch\n", + "## Model validation for 130 tokens : 100.0% similarity, with 130 matched token, and 0 token mismatch\n", + "## Model validation for 135 tokens : 100.0% similarity, with 135 matched token, and 0 token mismatch\n", + "## Model validation for 140 tokens : 100.0% similarity, with 140 matched token, and 0 token mismatch\n", + "## Model validation for 145 tokens : 100.0% similarity, with 145 matched token, and 0 token mismatch\n", + "## Model validation for 150 tokens : 100.0% similarity, with 150 matched token, and 0 token mismatch\n", + "## Model validation for 160 tokens : 100.0% similarity, with 160 matched token, and 0 token mismatch\n", + "## Model validation for 170 tokens : 100.0% similarity, with 170 matched token, and 0 token mismatch\n", + "## Model validation for 180 tokens : 100.0% similarity, with 180 matched token, and 0 token mismatch\n", + "## Model validation for 190 tokens : 100.0% similarity, with 190 matched token, and 0 token mismatch\n", + "## Model validation for 200 tokens : 100.0% similarity, with 200 matched token, and 0 token mismatch\n", + "## Model validation for 210 tokens : 100.0% similarity, with 210 matched token, and 0 token mismatch\n", + "## Model validation for 220 tokens : 100.0% similarity, with 220 matched token, and 0 token mismatch\n", + "## Model validation for 230 tokens : 100.0% similarity, with 230 matched token, and 0 token mismatch\n", + "## Model validation for 240 tokens : 100.0% similarity, with 240 matched token, and 0 token mismatch\n", + "## Model validation for 250 tokens : 100.0% similarity, with 250 matched token, and 0 token mismatch\n", + "## Model validation for 260 tokens : 100.0% similarity, with 260 matched token, and 0 token mismatch\n", + "## Model validation for 270 tokens : 100.0% similarity, with 270 matched token, and 0 token mismatch\n", + "## Model validation for 280 tokens : 100.0% similarity, with 280 matched token, and 0 token mismatch\n", + "## Model validation for 290 tokens : 100.0% similarity, with 290 matched token, and 0 token mismatch\n", + "## Model validation for 300 tokens : 100.0% similarity, with 300 matched token, and 0 token mismatch\n", + "## Model validation for 325 tokens : 100.0% similarity, with 325 matched token, and 0 token mismatch\n", + "## Model validation for 350 tokens : 100.0% similarity, with 350 matched token, and 0 token mismatch\n", + "## Model validation for 375 tokens : 100.0% similarity, with 375 matched token, and 0 token mismatch\n", + "## Model validation for 400 tokens : 100.0% similarity, with 400 matched token, and 0 token mismatch\n", + "## Model validation for 425 tokens : 100.0% similarity, with 425 matched token, and 0 token mismatch\n", + "## Model validation for 450 tokens : 100.0% similarity, with 450 matched token, and 0 token mismatch\n", + "## Model validation for 475 tokens : 100.0% similarity, with 475 matched token, and 0 token mismatch\n", + "## Model validation for 500 tokens : 99.8% similarity, with 499 matched token, and 1 token mismatch\n", + "## Model validation for 525 tokens : 100.0% similarity, with 525 matched token, and 0 token mismatch\n", + "## Model validation for 550 tokens : 99.81818181818181% similarity, with 549 matched token, and 1 token mismatch\n", + "## Model validation for 575 tokens : 99.82608695652175% similarity, with 574 matched token, and 1 token mismatch\n", + "## Model validation for 600 tokens : 99.5% similarity, with 597 matched token, and 3 token mismatch\n", + "## Model validation for 625 tokens : 99.52% similarity, with 622 matched token, and 3 token mismatch\n", + "## Model validation for 650 tokens : 99.6923076923077% similarity, with 648 matched token, and 2 token mismatch\n", + "## Model validation for 675 tokens : 99.55555555555556% similarity, with 672 matched token, and 3 token mismatch\n", + "## Model validation for 700 tokens : 99.57142857142857% similarity, with 697 matched token, and 3 token mismatch\n", + "## Model validation for 750 tokens : 99.2% similarity, with 744 matched token, and 6 token mismatch\n", + "## Model validation for 800 tokens : 98.875% similarity, with 791 matched token, and 9 token mismatch\n", + "## Model validation for 850 tokens : 98.70588235294117% similarity, with 839 matched token, and 11 token mismatch\n", + "## Model validation for 900 tokens : 98.33333333333333% similarity, with 885 matched token, and 15 token mismatch\n", + "## Model validation for 950 tokens : 98.3157894736842% similarity, with 934 matched token, and 16 token mismatch\n", + "## Model validation for 1000 tokens : 98.4% similarity, with 984 matched token, and 16 token mismatch\n", + "###\n", + "### Model validation end ###\n", + "###\n", + "SCRIPT_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/memory_script\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n", + "MODEL_CODE_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "[2024-01-23 19:40:53,200] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/workspace/RWKV-infctx-trainer/RWKV-v5/src/model.py:1390: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " batch_tokens = torch.tensor(\n", + "###\n", + "### Model validation start ###\n", + "###\n", + "## Model validation for 1000 tokens : 98.4% similarity, with 984 matched token, and 16 token mismatch\n", + "## Model validation for 1050 tokens : 98.0% similarity, with 1029 matched token, and 21 token mismatch\n", + "## Model validation for 1100 tokens : 98.0% similarity, with 1078 matched token, and 22 token mismatch\n", + "## Model validation for 1150 tokens : 97.56521739130434% similarity, with 1122 matched token, and 28 token mismatch\n", + "## Model validation for 1200 tokens : 97.33333333333334% similarity, with 1168 matched token, and 32 token mismatch\n", + "## Model validation for 1250 tokens : 97.2% similarity, with 1215 matched token, and 35 token mismatch\n", + "## Model validation for 1300 tokens : 96.46153846153847% similarity, with 1254 matched token, and 46 token mismatch\n", + "## Model validation for 1350 tokens : 96.14814814814815% similarity, with 1298 matched token, and 52 token mismatch\n", + "## Model validation for 1400 tokens : 95.42857142857143% similarity, with 1336 matched token, and 64 token mismatch\n", + "## Model validation for 1450 tokens : 95.10344827586206% similarity, with 1379 matched token, and 71 token mismatch\n", + "## Model validation for 1500 tokens : 95.19999999999999% similarity, with 1428 matched token, and 72 token mismatch\n", + "## Model validation for 1550 tokens : 94.64516129032258% similarity, with 1467 matched token, and 83 token mismatch\n", + "## Model validation for 1600 tokens : 94.3125% similarity, with 1509 matched token, and 91 token mismatch\n", + "## Model validation for 1650 tokens : 93.87878787878789% similarity, with 1549 matched token, and 101 token mismatch\n", + "## Model validation for 1700 tokens : 93.70588235294117% similarity, with 1593 matched token, and 107 token mismatch\n", + "## Model validation for 1750 tokens : 93.08571428571429% similarity, with 1629 matched token, and 121 token mismatch\n", + "## Model validation for 1800 tokens : 92.5% similarity, with 1665 matched token, and 135 token mismatch\n", + "## Model validation for 1850 tokens : 92.10810810810811% similarity, with 1704 matched token, and 146 token mismatch\n", + "## Model validation for 1900 tokens : 91.94736842105263% similarity, with 1747 matched token, and 153 token mismatch\n", + "## Model validation for 1950 tokens : 90.76923076923077% similarity, with 1770 matched token, and 180 token mismatch\n", + "## Model validation for 2000 tokens : 90.0% similarity, with 1800 matched token, and 200 token mismatch\n", + "## Model validation for 2050 tokens : 89.21951219512195% similarity, with 1829 matched token, and 221 token mismatch\n", + "## Model validation for 2100 tokens : 87.66666666666667% similarity, with 1841 matched token, and 259 token mismatch\n", + "## Model validation for 2150 tokens : 87.76744186046511% similarity, with 1887 matched token, and 263 token mismatch\n", + "## Model validation for 2200 tokens : 86.95454545454545% similarity, with 1913 matched token, and 287 token mismatch\n", + "## Model validation for 2250 tokens : 86.31111111111112% similarity, with 1942 matched token, and 308 token mismatch\n", + "## Model validation for 2300 tokens : 84.69565217391303% similarity, with 1948 matched token, and 352 token mismatch\n", + "## Model validation for 2350 tokens : 83.74468085106383% similarity, with 1968 matched token, and 382 token mismatch\n", + "## Model validation for 2400 tokens : 82.83333333333334% similarity, with 1988 matched token, and 412 token mismatch\n", + "## Model validation for 2450 tokens : 81.91836734693878% similarity, with 2007 matched token, and 443 token mismatch\n", + "## Model validation for 2500 tokens : 80.52% similarity, with 2013 matched token, and 487 token mismatch\n", + "## Model validation for 2550 tokens : 80.31372549019608% similarity, with 2048 matched token, and 502 token mismatch\n", + "## Model validation for 2600 tokens : 79.42307692307692% similarity, with 2065 matched token, and 535 token mismatch\n", + "## Model validation for 2650 tokens : 78.18867924528303% similarity, with 2072 matched token, and 578 token mismatch\n", + "## Model validation for 2700 tokens : 78.33333333333333% similarity, with 2115 matched token, and 585 token mismatch\n", + "## Model validation for 2750 tokens : 76.76363636363637% similarity, with 2111 matched token, and 639 token mismatch\n", + "## Model validation for 2800 tokens : 76.14285714285714% similarity, with 2132 matched token, and 668 token mismatch\n", + "## Model validation for 2850 tokens : 74.98245614035088% similarity, with 2137 matched token, and 713 token mismatch\n", + "## Model validation for 2900 tokens : 74.10344827586208% similarity, with 2149 matched token, and 751 token mismatch\n", + "## Model validation for 2950 tokens : 72.61016949152543% similarity, with 2142 matched token, and 808 token mismatch\n", + "## Model validation for 3000 tokens : 71.39999999999999% similarity, with 2142 matched token, and 858 token mismatch\n", + "## Model validation for 3050 tokens : 70.88524590163935% similarity, with 2162 matched token, and 888 token mismatch\n", + "## Model validation for 3100 tokens : 70.03225806451613% similarity, with 2171 matched token, and 929 token mismatch\n", + "## Model validation for 3150 tokens : 69.3968253968254% similarity, with 2186 matched token, and 964 token mismatch\n", + "## Model validation for 3200 tokens : 68.4375% similarity, with 2190 matched token, and 1010 token mismatch\n", + "## Model validation for 3250 tokens : 67.13846153846154% similarity, with 2182 matched token, and 1068 token mismatch\n", + "## Model validation for 3300 tokens : 65.72727272727272% similarity, with 2169 matched token, and 1131 token mismatch\n", + "## Model validation for 3350 tokens : 64.17910447761194% similarity, with 2150 matched token, and 1200 token mismatch\n", + "## Model validation for 3400 tokens : 62.76470588235294% similarity, with 2134 matched token, and 1266 token mismatch\n", + "## Model validation for 3450 tokens : 61.56521739130435% similarity, with 2124 matched token, and 1326 token mismatch\n", + "## Model validation for 3500 tokens : 60.05714285714285% similarity, with 2102 matched token, and 1398 token mismatch\n", + "## Model validation for 3550 tokens : 59.014084507042256% similarity, with 2095 matched token, and 1455 token mismatch\n", + "## Model validation for 3600 tokens : 57.333333333333336% similarity, with 2064 matched token, and 1536 token mismatch\n", + "## Model validation for 3650 tokens : 56.9041095890411% similarity, with 2077 matched token, and 1573 token mismatch\n", + "## Model validation for 3700 tokens : 55.7027027027027% similarity, with 2061 matched token, and 1639 token mismatch\n", + "## Model validation for 3750 tokens : 54.37333333333333% similarity, with 2039 matched token, and 1711 token mismatch\n", + "## Model validation for 3800 tokens : 52.921052631578945% similarity, with 2011 matched token, and 1789 token mismatch\n", + "## Model validation for 3850 tokens : 51.53246753246753% similarity, with 1984 matched token, and 1866 token mismatch\n", + "## Model validation for 3900 tokens : 50.10256410256411% similarity, with 1954 matched token, and 1946 token mismatch\n", + "## Model validation for 3950 tokens : 48.88607594936708% similarity, with 1931 matched token, and 2019 token mismatch\n", + "## Model validation for 4000 tokens : 48.075% similarity, with 1923 matched token, and 2077 token mismatch\n", + "###\n", + "### Model validation end ###\n", + "###\n", + "SCRIPT_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/memory_script\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n", + "MODEL_CODE_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "[2024-01-23 19:46:49,204] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/workspace/RWKV-infctx-trainer/RWKV-v5/src/model.py:1390: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " batch_tokens = torch.tensor(\n", + "###\n", + "### Model validation start ###\n", + "###\n", + "## Model validation for 4000 tokens : 48.025% similarity, with 1921 matched token, and 2079 token mismatch\n", + "## Model validation for 4050 tokens : 46.81481481481482% similarity, with 1896 matched token, and 2154 token mismatch\n", + "## Model validation for 4100 tokens : 45.31707317073171% similarity, with 1858 matched token, and 2242 token mismatch\n", + "## Model validation for 4150 tokens : 44.31325301204819% similarity, with 1839 matched token, and 2311 token mismatch\n", + "## Model validation for 4200 tokens : 43.785714285714285% similarity, with 1839 matched token, and 2361 token mismatch\n", + "## Model validation for 4250 tokens : 42.705882352941174% similarity, with 1815 matched token, and 2435 token mismatch\n", + "## Model validation for 4300 tokens : 41.837209302325576% similarity, with 1799 matched token, and 2501 token mismatch\n", + "## Model validation for 4350 tokens : 40.390804597701155% similarity, with 1757 matched token, and 2593 token mismatch\n", + "## Model validation for 4400 tokens : 39.15909090909091% similarity, with 1723 matched token, and 2677 token mismatch\n", + "## Model validation for 4450 tokens : 38.1123595505618% similarity, with 1696 matched token, and 2754 token mismatch\n", + "## Model validation for 4500 tokens : 37.2% similarity, with 1674 matched token, and 2826 token mismatch\n", + "## Model validation for 4550 tokens : 35.91208791208791% similarity, with 1634 matched token, and 2916 token mismatch\n", + "## Model validation for 4600 tokens : 35.45652173913044% similarity, with 1631 matched token, and 2969 token mismatch\n", + "## Model validation for 4650 tokens : 34.12903225806452% similarity, with 1587 matched token, and 3063 token mismatch\n", + "## Model validation for 4700 tokens : 33.38297872340426% similarity, with 1569 matched token, and 3131 token mismatch\n", + "## Model validation for 4750 tokens : 32.694736842105264% similarity, with 1553 matched token, and 3197 token mismatch\n", + "## Model validation for 4800 tokens : 31.958333333333332% similarity, with 1534 matched token, and 3266 token mismatch\n", + "## Model validation for 4850 tokens : 30.927835051546392% similarity, with 1500 matched token, and 3350 token mismatch\n", + "## Model validation for 4900 tokens : 30.20408163265306% similarity, with 1480 matched token, and 3420 token mismatch\n", + "^C\n" + ] + } + ], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 1000 4000\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 4000 8000" ] } ], "metadata": { "kernelspec": { - "display_name": "rwkv-infctx", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -7255,9 +3117,21 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 3695.805182, + "end_time": "2024-01-23T14:14:01.740273", + "environment_variables": {}, + "exception": null, + "input_path": "./World-1B5-mem-finetune.ipynb", + "output_path": "./World-1B5-mem-finetune.output.ipynb", + "parameters": {}, + "start_time": "2024-01-23T13:12:25.935091", + "version": "2.5.0" } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml index d9928b1d..1a02fe57 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml @@ -84,7 +84,7 @@ trainer: filename: null # Save the top/last K checkpoints - save_top_k: 2 + save_top_k: 3 # Choose by the most recent checkpoints (time based) monitor: 'step' mode: max @@ -187,18 +187,18 @@ model: # This allows the training of extreamly large context length (eg. 100k), # without eating up too much vram by keeping the training context length # to a resonable number sutible to the current GPU setup - ctx_len: 2048 + ctx_len: 8192 # Learning rate of the training process # --- # Initia learning rate of the process - lr_init: 8e-4 + lr_init: 3e-4 # Final learning rate after the learning rate period # learning rate will stay at final value from then onwards # # NOTE: lr_final / lr_period does not work with warmup_steps # and will be ignored (or replaced) with the warmup_steps logic instead - lr_final: 4e-4 + lr_final: 2e-4 # Number of epoch to reduce the learning rate from lr_init to lr_final # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml index 78357dcc..e6b8fcf6 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml @@ -84,7 +84,7 @@ trainer: filename: null # Save the top/last K checkpoints - save_top_k: 2 + save_top_k: 3 # Choose by the most recent checkpoints (time based) monitor: 'step' mode: max @@ -187,18 +187,18 @@ model: # This allows the training of extreamly large context length (eg. 100k), # without eating up too much vram by keeping the training context length # to a resonable number sutible to the current GPU setup - ctx_len: 2048 + ctx_len: 8192 # Learning rate of the training process # --- # Initia learning rate of the process - lr_init: 4e-4 + lr_init: 2e-4 # Final learning rate after the learning rate period # learning rate will stay at final value from then onwards # # NOTE: lr_final / lr_period does not work with warmup_steps # and will be ignored (or replaced) with the warmup_steps logic instead - lr_final: 3e-4 + lr_final: 1e-4 # Number of epoch to reduce the learning rate from lr_init to lr_final # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) From dc99b9b62249e84f00ccc7e665282de249ebca25 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Tue, 23 Jan 2024 22:16:45 +0000 Subject: [PATCH 22/23] 1.5b and 3b runs --- .../memory-test/World-1B5-mem-finetune.ipynb | 12 +- .../memory-test/World-3B-mem-finetune.ipynb | 2590 +++++++++++++++++ 2 files changed, 2596 insertions(+), 6 deletions(-) create mode 100644 notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb index 7bd2747b..9a9cf515 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb @@ -176,7 +176,7 @@ "tags": [] }, "source": [ - "## Finetune 1 (0 -> 4k) : Dataset preperation\n", + "## Finetune 1 (0 -> 2x2k) : Dataset preperation\n", "\n", "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." ] @@ -328,7 +328,7 @@ "tags": [] }, "source": [ - "## Finetune 1 (0 -> 4k) : The actual tune!" + "## Finetune 1 (0 -> 2x2k) : The actual tune!" ] }, { @@ -429,7 +429,7 @@ "id": "8259d9b5", "metadata": {}, "source": [ - "## Finetune 1 (0 -> 2*2k) : The actual tune!" + "## Finetune 1 (0 -> 2x2k) : The actual tune!" ] }, { @@ -485,7 +485,7 @@ "id": "6fe5d71b", "metadata": {}, "source": [ - "## Finetune 2 (0 -> 2*4k) : Dataset preperation\n", + "## Finetune 2 (2x2k -> 2x4k) : Dataset preperation\n", "\n", "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." ] @@ -2272,7 +2272,7 @@ "#\n", "# Ramping up the 3000+ - 400 words dataset\n", "# \n", - "for i in {3000..6000..25} \n", + "for i in {3025..6000..25} \n", "do\n", " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", @@ -2325,7 +2325,7 @@ "id": "60244472", "metadata": {}, "source": [ - "## Finetune 1 (0 -> 2*2k) : The actual tune!" + "## Finetune 2 (2x2k -> 2x4k) : The actual tune!" ] }, { diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb new file mode 100644 index 00000000..7e316ffa --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb @@ -0,0 +1,2590 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "093065b5", + "metadata": { + "papermill": { + "duration": 0.005853, + "end_time": "2024-01-23T11:31:12.266593", + "exception": false, + "start_time": "2024-01-23T11:31:12.260740", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV World Memory Finetune (Memory Finetune)\n", + "\n", + "This takes an existing RWKV world model, and finetune them specifically for the memory repeat task of various sizes.\n", + "This test is used as an approximation of testing the model token memory size in the \"worse case scenerio\"\n", + "\n", + "- Using randomized data, so prior learning does not help, nor is it possible to compress the data\n", + "- Using a variety of token lengths, to avoid overfitting to a single length\n", + "- Based on the pretrained model (rwkv world)\n", + "- This process does \"destroy the model\" but it helps quantify the model limits\n", + "\n", + "In practise however, the model may show \"attention range\" longer then what is benchmarked, as natural text is highly compressible. Unlike the pure randomized data that was being tested here.\n", + "\n", + "This runner has been optimized to run on 8 x 80GB vram nodes, you should allocate atleast 1TB disk space.\n", + "\n", + "> This project assumes you have the rwkv-infctx conda env setup, and you are executing in that environment - see the main README.md for the conda env setup steps" + ] + }, + { + "cell_type": "markdown", + "id": "0a0d2888", + "metadata": { + "papermill": { + "duration": 0.004809, + "end_time": "2024-01-23T11:31:12.276770", + "exception": false, + "start_time": "2024-01-23T11:31:12.271961", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Configure your environment settings\n", + "(!Important: you will need to rerun the below cell, if you restart your kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e8b16f4a", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.286314Z", + "iopub.status.busy": "2024-01-23T11:31:12.285325Z", + "iopub.status.idle": "2024-01-23T11:31:12.303712Z", + "shell.execute_reply": "2024-01-23T11:31:12.302559Z" + }, + "papermill": { + "duration": 0.024597, + "end_time": "2024-01-23T11:31:12.306334", + "exception": false, + "start_time": "2024-01-23T11:31:12.281737", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"[8xA100] RWKV-v5-3B-World\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-3B-world.pth\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/RWKV-5-World-3B-v2-20231118-ctx16k.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "MEMORY_SCRIPT_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./notebook/util-scripts/memory_script\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "markdown", + "id": "dc0dcc53", + "metadata": { + "papermill": { + "duration": 0.003834, + "end_time": "2024-01-23T11:31:12.316887", + "exception": false, + "start_time": "2024-01-23T11:31:12.313053", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Download the pretrained model\n", + "(if you want to skip the the basemodel train + instruct tune)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db947b68", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.326806Z", + "iopub.status.busy": "2024-01-23T11:31:12.325636Z", + "iopub.status.idle": "2024-01-23T11:31:12.823116Z", + "shell.execute_reply": "2024-01-23T11:31:12.821743Z" + }, + "papermill": { + "duration": 0.505449, + "end_time": "2024-01-23T11:31:12.826085", + "exception": false, + "start_time": "2024-01-23T11:31:12.320636", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e8c15352", + "metadata": { + "papermill": { + "duration": 0.005467, + "end_time": "2024-01-23T11:31:12.837257", + "exception": false, + "start_time": "2024-01-23T11:31:12.831790", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 4k) : Dataset preperation\n", + "\n", + "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bfca27", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.843769Z", + "iopub.status.busy": "2024-01-23T11:31:12.843411Z", + "iopub.status.idle": "2024-01-23T11:31:16.588281Z", + "shell.execute_reply": "2024-01-23T11:31:16.587130Z" + }, + "papermill": { + "duration": 3.751098, + "end_time": "2024-01-23T11:31:16.591186", + "exception": false, + "start_time": "2024-01-23T11:31:12.840088", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Folder and eval pip setup\n", + "!cp -r \"{MEMORY_SCRIPT_DIR}/\" \"{NOTEBOOK_DIR}/\"\n", + "!python3 -m pip install rwkv asyncio aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c38e51c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:16.604643Z", + "iopub.status.busy": "2024-01-23T11:31:16.604064Z", + "iopub.status.idle": "2024-01-23T11:31:19.825147Z", + "shell.execute_reply": "2024-01-23T11:31:19.823826Z" + }, + "papermill": { + "duration": 3.231491, + "end_time": "2024-01-23T11:31:19.828814", + "exception": false, + "start_time": "2024-01-23T11:31:16.597323", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for < 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 150 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {110..200..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {210..4000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0577a12c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:20.066235Z", + "iopub.status.busy": "2024-01-23T11:31:20.065803Z", + "iopub.status.idle": "2024-01-23T11:34:10.695875Z", + "shell.execute_reply": "2024-01-23T11:34:10.694270Z" + }, + "papermill": { + "duration": 170.715947, + "end_time": "2024-01-23T11:34:10.699529", + "exception": false, + "start_time": "2024-01-23T11:31:19.983582", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-1-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-1-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "82e837d9", + "metadata": { + "papermill": { + "duration": 0.113925, + "end_time": "2024-01-23T11:34:10.936645", + "exception": false, + "start_time": "2024-01-23T11:34:10.822720", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 4k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "127af572", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:34:11.167985Z", + "iopub.status.busy": "2024-01-23T11:34:11.167540Z", + "iopub.status.idle": "2024-01-23T13:03:10.590814Z", + "shell.execute_reply": "2024-01-23T13:03:10.588629Z" + }, + "papermill": { + "duration": 5339.542389, + "end_time": "2024-01-23T13:03:10.594019", + "exception": false, + "start_time": "2024-01-23T11:34:11.051630", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-1-tune.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-1 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=8 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81395227", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:03:10.911324Z", + "iopub.status.busy": "2024-01-23T13:03:10.910769Z", + "iopub.status.idle": "2024-01-23T13:03:39.572566Z", + "shell.execute_reply": "2024-01-23T13:03:39.571025Z" + }, + "papermill": { + "duration": 28.823213, + "end_time": "2024-01-23T13:03:39.575536", + "exception": false, + "start_time": "2024-01-23T13:03:10.752323", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "595a1b18", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:05:37.852892Z", + "iopub.status.busy": "2024-01-23T13:05:37.852266Z", + "iopub.status.idle": "2024-01-23T13:12:24.206962Z", + "shell.execute_reply": "2024-01-23T13:12:24.205778Z" + }, + "papermill": { + "duration": 406.525339, + "end_time": "2024-01-23T13:12:24.209784", + "exception": false, + "start_time": "2024-01-23T13:05:37.684445", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\" \"none\" 1000 3000" + ] + }, + { + "cell_type": "markdown", + "id": "bca9abd4", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8a4da53a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n", + "Generated JSONL file with - 5 max words, 100 samples - at ./dataset/gen-word-5-count.jsonl\n", + "Generated JSONL file with - 2 max words, 100 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 25 max words, 100 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 45 max words, 100 samples - at ./dataset/gen-word-45-count.jsonl\n", + "Generated JSONL file with - 4 max words, 100 samples - at ./dataset/word-4-count.jsonl\n", + "Generated JSONL file with - 10 max words, 100 samples - at ./dataset/gen-word-10-count.jsonl\n", + "Generated JSONL file with - 15 max words, 100 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 20 max words, 100 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 40 max words, 100 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 30 max words, 100 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 70 max words, 100 samples - at ./dataset/gen-word-70-count.jsonl\n", + "Generated JSONL file with - 35 max words, 100 samples - at ./dataset/gen-word-35-count.jsonl\n", + "Generated JSONL file with - 50 max words, 100 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 60 max words, 100 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 90 max words, 100 samples - at ./dataset/gen-word-90-count.jsonl\n", + "Generated JSONL file with - 80 max words, 100 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 85 max words, 100 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 120 max words, 75 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 65 max words, 100 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 75 max words, 100 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 55 max words, 100 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 230 max words, 75 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 360 max words, 75 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated JSONL file with - 200 max words, 75 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 260 max words, 75 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated JSONL file with - 140 max words, 75 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated JSONL file with - 110 max words, 75 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 270 max words, 75 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 460 max words, 75 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated JSONL file with - 400 max words, 75 samples - at ./dataset/gen-word-400-count.jsonl\n", + "Generated JSONL file with - 300 max words, 75 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated JSONL file with - 95 max words, 100 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 280 max words, 75 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated JSONL file with - 150 max words, 75 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 160 max words, 75 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated JSONL file with - 240 max words, 75 samples - at ./dataset/gen-word-240-count.jsonlGenerated JSONL file with - 190 max words, 75 samples - at ./dataset/gen-word-190-count.jsonl\n", + "\n", + "Generated JSONL file with - 580 max words, 75 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated JSONL file with - 290 max words, 75 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 210 max words, 75 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 250 max words, 75 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 130 max words, 75 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 410 max words, 75 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated JSONL file with - 180 max words, 75 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 320 max words, 75 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 170 max words, 75 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 220 max words, 75 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 370 max words, 75 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated JSONL file with - 100 max words, 100 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 450 max words, 75 samples - at ./dataset/gen-word-450-count.jsonl\n", + "Generated JSONL file with - 500 max words, 75 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated JSONL file with - 310 max words, 75 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated JSONL file with - 430 max words, 75 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 420 max words, 75 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated JSONL file with - 380 max words, 75 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated JSONL file with - 530 max words, 75 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated JSONL file with - 350 max words, 75 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated JSONL file with - 510 max words, 75 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 340 max words, 75 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 560 max words, 75 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated JSONL file with - 550 max words, 75 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated JSONL file with - 330 max words, 75 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 490 max words, 75 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated JSONL file with - 470 max words, 75 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated JSONL file with - 520 max words, 75 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated JSONL file with - 440 max words, 75 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated JSONL file with - 390 max words, 75 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated JSONL file with - 480 max words, 75 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 570 max words, 75 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated JSONL file with - 680 max words, 75 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated a single JSONL file with 680 samples (75 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated a single JSONL file with 694 samples (75 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", + "Generated a single JSONL file with 527 samples (75 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated JSONL file with - 590 max words, 75 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated a single JSONL file with 685 samples (75 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated JSONL file with - 610 max words, 75 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated a single JSONL file with 532 samples (75 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated a single JSONL file with 1005 samples (75 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated a single JSONL file with 1034 samples (75 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated a single JSONL file with 681 samples (75 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated a single JSONL file with 523 samples (75 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated a single JSONL file with 436 samples (75 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated JSONL file with - 640 max words, 75 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated a single JSONL file with 444 samples (75 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonlGenerated a single JSONL file with 789 samples (75 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "\n", + "Generated a single JSONL file with 305 samples (75 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonlGenerated a single JSONL file with 373 samples (75 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated a single JSONL file with 686 samples (75 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "\n", + "Generated a single JSONL file with 699 samples (75 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated JSONL file with - 690 max words, 75 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated a single JSONL file with 1153 samples (75 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated a single JSONL file with 534 samples (75 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated a single JSONL file with 4083 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonlGenerated a single JSONL file with 375 samples (75 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated a single JSONL file with 432 samples (75 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated a single JSONL file with 1491 samples (75 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated a single JSONL file with 447 samples (75 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated a single JSONL file with 1018 samples (75 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated a single JSONL file with 724 samples (75 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated a single JSONL file with 1287 samples (75 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated a single JSONL file with 1098 samples (75 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated a single JSONL file with 2650 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated JSONL file with - 1670 max words, 75 samples - at ./dataset/gen-word-1670-count.jsonl\n", + "Generated a single JSONL file with 1227 samples (75 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", + "Generated a single JSONL file with 3543 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated a single JSONL file with 2916 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 439 samples (75 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 75 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated a single JSONL file with 4794 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 1056 samples (75 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated JSONL file with - 800 max words, 75 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated JSONL file with - 630 max words, 75 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated JSONL file with - 600 max words, 75 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated a single JSONL file with 522 samples (75 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated a single JSONL file with 3773 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated a single JSONL file with 3298 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated a single JSONL file with 3145 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated a single JSONL file with 525 samples (75 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated JSONL file with - 620 max words, 75 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated JSONL file with - 650 max words, 75 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated a single JSONL file with 5860 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated a single JSONL file with 4379 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated a single JSONL file with 1382 samples (75 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated a single JSONL file with 755 samples (75 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated a single JSONL file with 747 samples (75 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated JSONL file with - 540 max words, 75 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated a single JSONL file with 5255 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated JSONL file with - 960 max words, 75 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated a single JSONL file with 435 samples (75 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 75 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated JSONL file with - 910 max words, 75 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated a single JSONL file with 6581 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated JSONL file with - 670 max words, 75 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated JSONL file with - 660 max words, 75 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated a single JSONL file with 2798 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated JSONL file with - 740 max words, 75 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated JSONL file with - 990 max words, 75 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated a single JSONL file with 8727 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated JSONL file with - 710 max words, 75 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated JSONL file with - 950 max words, 75 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 75 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated JSONL file with - 970 max words, 75 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated a single JSONL file with 7513 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated JSONL file with - 980 max words, 75 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 75 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 75 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 75 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated JSONL file with - 700 max words, 75 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated JSONL file with - 720 max words, 75 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 75 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated JSONL file with - 730 max words, 75 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated a single JSONL file with 444 samples (75 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated a single JSONL file with 435 samples (75 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated a single JSONL file with 10623 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated a single JSONL file with 13076 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated JSONL file with - 760 max words, 75 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "Generated JSONL file with - 860 max words, 75 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 940 max words, 75 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated JSONL file with - 1880 max words, 75 samples - at ./dataset/gen-word-1880-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated JSONL file with - 770 max words, 75 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated a single JSONL file with 239 samples (75 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated JSONL file with - 810 max words, 75 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated JSONL file with - 750 max words, 75 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated JSONL file with - 820 max words, 75 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 75 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", + "Generated JSONL file with - 840 max words, 75 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated JSONL file with - 790 max words, 75 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 305 samples (75 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated JSONL file with - 850 max words, 75 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 75 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated JSONL file with - 890 max words, 75 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated JSONL file with - 930 max words, 75 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 75 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated JSONL file with - 870 max words, 75 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 75 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated JSONL file with - 920 max words, 75 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 780 max words, 75 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated a single JSONL file with 243 samples (75 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated JSONL file with - 1640 max words, 75 samples - at ./dataset/gen-word-1640-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated a single JSONL file with 241 samples (75 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated JSONL file with - 830 max words, 75 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated a single JSONL file with 17734 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 75 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 75 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 75 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated JSONL file with - 1700 max words, 75 samples - at ./dataset/gen-word-1700-count.jsonl\n", + "Generated JSONL file with - 2430 max words, 75 samples - at ./dataset/gen-word-2430-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 75 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 75 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 75 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated JSONL file with - 2750 max words, 75 samples - at ./dataset/gen-word-2750-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 75 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", + "Generated a single JSONL file with 296 samples (75 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated JSONL file with - 1860 max words, 75 samples - at ./dataset/gen-word-1860-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 75 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 75 samples - at ./dataset/gen-word-1320-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated JSONL file with - 1930 max words, 75 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "Generated JSONL file with - 880 max words, 75 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated JSONL file with - 2360 max words, 75 samples - at ./dataset/gen-word-2360-count.jsonl\n", + "Generated a single JSONL file with 26117 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 75 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated a single JSONL file with 94 samples (75 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", + "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 75 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", + "Generated JSONL file with - 2300 max words, 75 samples - at ./dataset/gen-word-2300-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 75 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 221 samples (75 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 75 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated a single JSONL file with 243 samples (75 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 75 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 75 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", + "Generated JSONL file with - 2530 max words, 75 samples - at ./dataset/gen-word-2530-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated JSONL file with - 2970 max words, 75 samples - at ./dataset/gen-word-2970-count.jsonl\n", + "Generated JSONL file with - 900 max words, 75 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", + "Generated a single JSONL file with 191 samples (75 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 75 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 75 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 75 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 196 samples (75 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated JSONL file with - 1810 max words, 75 samples - at ./dataset/gen-word-1810-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 75 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 75 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 75 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated JSONL file with - 1310 max words, 75 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 75 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated JSONL file with - 1240 max words, 75 samples - at ./dataset/gen-word-1240-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 75 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 75 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated a single JSONL file with 193 samples (75 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 75 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 75 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated a single JSONL file with 191 samples (75 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 75 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 75 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated a single JSONL file with 178 samples (75 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated JSONL file with - 1440 max words, 75 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 75 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 75 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 75 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated a single JSONL file with 187 samples (75 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 75 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 75 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated JSONL file with - 1510 max words, 75 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", + "Generated JSONL file with - 1530 max words, 75 samples - at ./dataset/gen-word-1530-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated JSONL file with - 1540 max words, 75 samples - at ./dataset/gen-word-1540-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 154 samples (75 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 192 samples (75 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated JSONL file with - 1520 max words, 75 samples - at ./dataset/gen-word-1520-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated JSONL file with - 1560 max words, 75 samples - at ./dataset/gen-word-1560-count.jsonl\n", + "Generated JSONL file with - 1550 max words, 75 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", + "Generated a single JSONL file with 88 samples (75 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 75 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 75 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated JSONL file with - 1600 max words, 75 samples - at ./dataset/gen-word-1600-count.jsonl\n", + "Generated JSONL file with - 1730 max words, 75 samples - at ./dataset/gen-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated JSONL file with - 1590 max words, 75 samples - at ./dataset/gen-word-1590-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 75 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", + "Generated a single JSONL file with 155 samples (75 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 75 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated JSONL file with - 1570 max words, 75 samples - at ./dataset/gen-word-1570-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated JSONL file with - 1630 max words, 75 samples - at ./dataset/gen-word-1630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated JSONL file with - 1950 max words, 75 samples - at ./dataset/gen-word-1950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated JSONL file with - 1610 max words, 75 samples - at ./dataset/gen-word-1610-count.jsonl\n", + "Generated JSONL file with - 1680 max words, 75 samples - at ./dataset/gen-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", + "Generated JSONL file with - 2000 max words, 75 samples - at ./dataset/gen-word-2000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonlGenerated a single JSONL file with 150 samples (75 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", + "\n", + "Generated JSONL file with - 1650 max words, 75 samples - at ./dataset/gen-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", + "Generated JSONL file with - 2100 max words, 75 samples - at ./dataset/gen-word-2100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated JSONL file with - 1580 max words, 75 samples - at ./dataset/gen-word-1580-count.jsonl\n", + "Generated JSONL file with - 1740 max words, 75 samples - at ./dataset/gen-word-1740-count.jsonl\n", + "Generated JSONL file with - 1690 max words, 75 samples - at ./dataset/gen-word-1690-count.jsonl\n", + "Generated JSONL file with - 2270 max words, 75 samples - at ./dataset/gen-word-2270-count.jsonl\n", + "Generated JSONL file with - 1660 max words, 75 samples - at ./dataset/gen-word-1660-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", + "Generated JSONL file with - 1750 max words, 75 samples - at ./dataset/gen-word-1750-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", + "Generated JSONL file with - 1720 max words, 75 samples - at ./dataset/gen-word-1720-count.jsonl\n", + "Generated JSONL file with - 1620 max words, 75 samples - at ./dataset/gen-word-1620-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", + "Generated JSONL file with - 1770 max words, 75 samples - at ./dataset/gen-word-1770-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", + "Generated JSONL file with - 1780 max words, 75 samples - at ./dataset/gen-word-1780-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", + "Generated a single JSONL file with 120 samples (75 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", + "Generated JSONL file with - 2650 max words, 75 samples - at ./dataset/gen-word-2650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", + "Generated JSONL file with - 2810 max words, 75 samples - at ./dataset/gen-word-2810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", + "Generated a single JSONL file with 55780 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", + "Generated JSONL file with - 1710 max words, 75 samples - at ./dataset/gen-word-1710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", + "Generated JSONL file with - 2800 max words, 75 samples - at ./dataset/gen-word-2800-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", + "Generated JSONL file with - 2990 max words, 75 samples - at ./dataset/gen-word-2990-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", + "Generated JSONL file with - 1960 max words, 75 samples - at ./dataset/gen-word-1960-count.jsonl\n", + "Generated JSONL file with - 2740 max words, 75 samples - at ./dataset/gen-word-2740-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", + "Generated JSONL file with - 2290 max words, 75 samples - at ./dataset/gen-word-2290-count.jsonl\n", + "Generated JSONL file with - 1800 max words, 75 samples - at ./dataset/gen-word-1800-count.jsonl\n", + "Generated JSONL file with - 1840 max words, 75 samples - at ./dataset/gen-word-1840-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", + "Generated JSONL file with - 1760 max words, 75 samples - at ./dataset/gen-word-1760-count.jsonl\n", + "Generated a single JSONL file with 92 samples (75 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", + "Generated JSONL file with - 1820 max words, 75 samples - at ./dataset/gen-word-1820-count.jsonl\n", + "Generated JSONL file with - 1790 max words, 75 samples - at ./dataset/gen-word-1790-count.jsonl\n", + "Generated JSONL file with - 1830 max words, 75 samples - at ./dataset/gen-word-1830-count.jsonl\n", + "Generated JSONL file with - 1850 max words, 75 samples - at ./dataset/gen-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", + "Generated JSONL file with - 1890 max words, 75 samples - at ./dataset/gen-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", + "Generated JSONL file with - 2940 max words, 75 samples - at ./dataset/gen-word-2940-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", + "Generated JSONL file with - 1900 max words, 75 samples - at ./dataset/gen-word-1900-count.jsonl\n", + "Generated a single JSONL file with 81 samples (75 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", + "Generated JSONL file with - 1910 max words, 75 samples - at ./dataset/gen-word-1910-count.jsonl\n", + "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated JSONL file with - 1920 max words, 75 samples - at ./dataset/gen-word-1920-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", + "Generated JSONL file with - 2030 max words, 75 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", + "Generated JSONL file with - 2470 max words, 75 samples - at ./dataset/gen-word-2470-count.jsonl\n", + "Generated JSONL file with - 2020 max words, 75 samples - at ./dataset/gen-word-2020-count.jsonl\n", + "Generated JSONL file with - 2180 max words, 75 samples - at ./dataset/gen-word-2180-count.jsonl\n", + "Generated JSONL file with - 1870 max words, 75 samples - at ./dataset/gen-word-1870-count.jsonl\n", + "Generated JSONL file with - 1970 max words, 75 samples - at ./dataset/gen-word-1970-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", + "Generated JSONL file with - 2350 max words, 75 samples - at ./dataset/gen-word-2350-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", + "Generated JSONL file with - 2010 max words, 75 samples - at ./dataset/gen-word-2010-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", + "Generated JSONL file with - 1990 max words, 75 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "Generated JSONL file with - 1940 max words, 75 samples - at ./dataset/gen-word-1940-count.jsonl\n", + "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonl\n", + "Generated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated JSONL file with - 2120 max words, 75 samples - at ./dataset/gen-word-2120-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", + "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", + "Generated JSONL file with - 2070 max words, 75 samples - at ./dataset/gen-word-2070-count.jsonl\n", + "Generated JSONL file with - 2080 max words, 75 samples - at ./dataset/gen-word-2080-count.jsonl\n", + "Generated JSONL file with - 2190 max words, 75 samples - at ./dataset/gen-word-2190-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", + "Generated JSONL file with - 2110 max words, 75 samples - at ./dataset/gen-word-2110-count.jsonl\n", + "Generated JSONL file with - 1980 max words, 75 samples - at ./dataset/gen-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonl\n", + "Generated JSONL file with - 2060 max words, 75 samples - at ./dataset/gen-word-2060-count.jsonl\n", + "Generated JSONL file with - 2160 max words, 75 samples - at ./dataset/gen-word-2160-count.jsonl\n", + "Generated JSONL file with - 2040 max words, 75 samples - at ./dataset/gen-word-2040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonl\n", + "Generated JSONL file with - 2170 max words, 75 samples - at ./dataset/gen-word-2170-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", + "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", + "Generated JSONL file with - 2250 max words, 75 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated JSONL file with - 2710 max words, 75 samples - at ./dataset/gen-word-2710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", + "Generated JSONL file with - 2590 max words, 75 samples - at ./dataset/gen-word-2590-count.jsonl\n", + "Generated JSONL file with - 2280 max words, 75 samples - at ./dataset/gen-word-2280-count.jsonl\n", + "Generated JSONL file with - 2200 max words, 75 samples - at ./dataset/gen-word-2200-count.jsonl\n", + "Generated JSONL file with - 2130 max words, 75 samples - at ./dataset/gen-word-2130-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", + "Generated JSONL file with - 2260 max words, 75 samples - at ./dataset/gen-word-2260-count.jsonl\n", + "Generated JSONL file with - 2340 max words, 75 samples - at ./dataset/gen-word-2340-count.jsonl\n", + "Generated JSONL file with - 2230 max words, 75 samples - at ./dataset/gen-word-2230-count.jsonl\n", + "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", + "Generated JSONL file with - 2400 max words, 75 samples - at ./dataset/gen-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", + "Generated JSONL file with - 2140 max words, 75 samples - at ./dataset/gen-word-2140-count.jsonl\n", + "Generated JSONL file with - 2580 max words, 75 samples - at ./dataset/gen-word-2580-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", + "Generated JSONL file with - 2450 max words, 75 samples - at ./dataset/gen-word-2450-count.jsonl\n", + "Generated a single JSONL file with 120 samples (75 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", + "Generated JSONL file with - 2370 max words, 75 samples - at ./dataset/gen-word-2370-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonl\n", + "Generated JSONL file with - 2380 max words, 75 samples - at ./dataset/gen-word-2380-count.jsonl\n", + "Generated JSONL file with - 2220 max words, 75 samples - at ./dataset/gen-word-2220-count.jsonl\n", + "Generated JSONL file with - 2330 max words, 75 samples - at ./dataset/gen-word-2330-count.jsonl\n", + "Generated JSONL file with - 2240 max words, 75 samples - at ./dataset/gen-word-2240-count.jsonl\n", + "Generated a single JSONL file with 136 samples (75 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", + "Generated JSONL file with - 2210 max words, 75 samples - at ./dataset/gen-word-2210-count.jsonl\n", + "Generated JSONL file with - 2310 max words, 75 samples - at ./dataset/gen-word-2310-count.jsonl\n", + "Generated JSONL file with - 2410 max words, 75 samples - at ./dataset/gen-word-2410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", + "Generated JSONL file with - 2150 max words, 75 samples - at ./dataset/gen-word-2150-count.jsonl\n", + "Generated JSONL file with - 2560 max words, 75 samples - at ./dataset/gen-word-2560-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", + "Generated JSONL file with - 2320 max words, 75 samples - at ./dataset/gen-word-2320-count.jsonl\n", + "Generated a single JSONL file with 135 samples (75 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated JSONL file with - 2980 max words, 75 samples - at ./dataset/gen-word-2980-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonl\n", + "Generated JSONL file with - 2390 max words, 75 samples - at ./dataset/gen-word-2390-count.jsonl\n", + "Generated JSONL file with - 2090 max words, 75 samples - at ./dataset/gen-word-2090-count.jsonl\n", + "Generated a single JSONL file with 138 samples (75 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", + "Generated JSONL file with - 2050 max words, 75 samples - at ./dataset/gen-word-2050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", + "Generated a single JSONL file with 138 samples (75 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", + "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", + "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", + "Generated JSONL file with - 2930 max words, 75 samples - at ./dataset/gen-word-2930-count.jsonl\n", + "Generated JSONL file with - 2420 max words, 75 samples - at ./dataset/gen-word-2420-count.jsonl\n", + "Generated JSONL file with - 2790 max words, 75 samples - at ./dataset/gen-word-2790-count.jsonl\n", + "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", + "Generated JSONL file with - 2440 max words, 75 samples - at ./dataset/gen-word-2440-count.jsonl\n", + "Generated JSONL file with - 2460 max words, 75 samples - at ./dataset/gen-word-2460-count.jsonl\n", + "Generated a single JSONL file with 114 samples (75 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", + "Generated a single JSONL file with 115 samples (75 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", + "Generated a single JSONL file with 117 samples (75 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", + "Generated JSONL file with - 2570 max words, 75 samples - at ./dataset/gen-word-2570-count.jsonl\n", + "Generated a single JSONL file with 108 samples (75 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", + "Generated JSONL file with - 2490 max words, 75 samples - at ./dataset/gen-word-2490-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", + "Generated a single JSONL file with 139 samples (75 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", + "Generated a single JSONL file with 144 samples (75 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", + "Generated a single JSONL file with 118 samples (75 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", + "Generated JSONL file with - 2630 max words, 75 samples - at ./dataset/gen-word-2630-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", + "Generated JSONL file with - 2540 max words, 75 samples - at ./dataset/gen-word-2540-count.jsonl\n", + "Generated JSONL file with - 2480 max words, 75 samples - at ./dataset/gen-word-2480-count.jsonl\n", + "Generated JSONL file with - 2600 max words, 75 samples - at ./dataset/gen-word-2600-count.jsonl\n", + "Generated JSONL file with - 2610 max words, 75 samples - at ./dataset/gen-word-2610-count.jsonl\n", + "Generated JSONL file with - 2510 max words, 75 samples - at ./dataset/gen-word-2510-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", + "Generated JSONL file with - 2620 max words, 75 samples - at ./dataset/gen-word-2620-count.jsonl\n", + "Generated a single JSONL file with 108 samples (75 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", + "Generated JSONL file with - 2500 max words, 75 samples - at ./dataset/gen-word-2500-count.jsonl\n", + "Generated a single JSONL file with 113 samples (75 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", + "Generated JSONL file with - 2820 max words, 75 samples - at ./dataset/gen-word-2820-count.jsonl\n", + "Generated JSONL file with - 2680 max words, 75 samples - at ./dataset/gen-word-2680-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", + "Generated JSONL file with - 2760 max words, 75 samples - at ./dataset/gen-word-2760-count.jsonl\n", + "Generated JSONL file with - 2550 max words, 75 samples - at ./dataset/gen-word-2550-count.jsonl\n", + "Generated JSONL file with - 2640 max words, 75 samples - at ./dataset/gen-word-2640-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonl\n", + "Generated JSONL file with - 2660 max words, 75 samples - at ./dataset/gen-word-2660-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", + "Generated JSONL file with - 2520 max words, 75 samples - at ./dataset/gen-word-2520-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", + "Generated JSONL file with - 2720 max words, 75 samples - at ./dataset/gen-word-2720-count.jsonl\n", + "Generated JSONL file with - 2830 max words, 75 samples - at ./dataset/gen-word-2830-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", + "Generated JSONL file with - 2770 max words, 75 samples - at ./dataset/gen-word-2770-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4775 max words - at ./dataset/shuffle-word-4775-count.jsonl\n", + "Generated JSONL file with - 2690 max words, 75 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 75 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated JSONL file with - 2950 max words, 75 samples - at ./dataset/gen-word-2950-count.jsonl\n", + "Generated JSONL file with - 2910 max words, 75 samples - at ./dataset/gen-word-2910-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", + "Generated JSONL file with - 2860 max words, 75 samples - at ./dataset/gen-word-2860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", + "Generated a single JSONL file with 112 samples (75 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", + "Generated JSONL file with - 2850 max words, 75 samples - at ./dataset/gen-word-2850-count.jsonl\n", + "Generated JSONL file with - 2890 max words, 75 samples - at ./dataset/gen-word-2890-count.jsonl\n", + "Generated JSONL file with - 2960 max words, 75 samples - at ./dataset/gen-word-2960-count.jsonl\n", + "Generated JSONL file with - 2900 max words, 75 samples - at ./dataset/gen-word-2900-count.jsonl\n", + "Generated JSONL file with - 2730 max words, 75 samples - at ./dataset/gen-word-2730-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", + "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated JSONL file with - 2780 max words, 75 samples - at ./dataset/gen-word-2780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated JSONL file with - 2670 max words, 75 samples - at ./dataset/gen-word-2670-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", + "Generated JSONL file with - 2840 max words, 75 samples - at ./dataset/gen-word-2840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", + "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", + "Generated JSONL file with - 2880 max words, 75 samples - at ./dataset/gen-word-2880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", + "Generated JSONL file with - 2920 max words, 75 samples - at ./dataset/gen-word-2920-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", + "Generated JSONL file with - 2870 max words, 75 samples - at ./dataset/gen-word-2870-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", + "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", + "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", + "Generated JSONL file with - 2700 max words, 75 samples - at ./dataset/gen-word-2700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", + "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", + "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", + "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", + "Generated JSONL file with - 4225 max words, 100 samples - at ./dataset/gen-word-4225-count.jsonl\n", + "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", + "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", + "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", + "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", + "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonl\n", + "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", + "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", + "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", + "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", + "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5850 max words - at ./dataset/shuffle-word-5850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", + "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4075 max words - at ./dataset/shuffle-word-4075-count.jsonl\n", + "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", + "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", + "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", + "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", + "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", + "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", + "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4125 max words - at ./dataset/shuffle-word-4125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", + "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", + "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", + "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", + "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", + "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", + "Generated JSONL file with - 4350 max words, 100 samples - at ./dataset/gen-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", + "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", + "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", + "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", + "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", + "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4300 max words - at ./dataset/shuffle-word-4300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4375 max words - at ./dataset/shuffle-word-4375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", + "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", + "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", + "Generated JSONL file with - 4025 max words, 100 samples - at ./dataset/gen-word-4025-count.jsonl\n", + "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", + "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", + "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", + "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", + "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", + "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", + "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", + "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4575 max words - at ./dataset/shuffle-word-4575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", + "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", + "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", + "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", + "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", + "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", + "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", + "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", + "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", + "Generated JSONL file with - 4325 max words, 100 samples - at ./dataset/gen-word-4325-count.jsonl\n", + "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", + "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4925 max words - at ./dataset/shuffle-word-4925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", + "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", + "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", + "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", + "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", + "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4700 max words - at ./dataset/shuffle-word-4700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5275 max words - at ./dataset/shuffle-word-5275-count.jsonl\n", + "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", + "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", + "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", + "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", + "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", + "Generated JSONL file with - 5375 max words, 100 samples - at ./dataset/gen-word-5375-count.jsonl\n", + "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", + "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", + "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5875 max words - at ./dataset/shuffle-word-5875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", + "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", + "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", + "Generated JSONL file with - 5000 max words, 100 samples - at ./dataset/gen-word-5000-count.jsonl\n", + "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", + "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", + "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", + "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", + "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", + "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", + "Generated JSONL file with - 5875 max words, 100 samples - at ./dataset/gen-word-5875-count.jsonl\n", + "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", + "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", + "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", + "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", + "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", + "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", + "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", + "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", + "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", + "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", + "Generated JSONL file with - 4925 max words, 100 samples - at ./dataset/gen-word-4925-count.jsonl\n", + "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", + "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", + "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", + "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", + "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", + "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", + "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", + "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", + "Generated JSONL file with - 5700 max words, 100 samples - at ./dataset/gen-word-5700-count.jsonl\n", + "Generated JSONL file with - 6000 max words, 100 samples - at ./dataset/gen-word-6000-count.jsonl\n", + "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", + "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", + "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", + "## Done ##\n", + "total 1.8G\n", + "drwxr-xr-x 2 root root 40K Jan 23 20:09 .\n", + "drwxr-xr-x 5 root root 4.0K Jan 23 20:09 ..\n", + "-rw-r--r-- 1 root root 20K Jan 23 20:09 gen-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 106K Jan 23 20:09 gen-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 732K Jan 23 20:09 gen-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 746K Jan 23 20:09 gen-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 749K Jan 23 20:09 gen-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 757K Jan 23 20:09 gen-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 762K Jan 23 20:09 gen-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 785K Jan 23 20:09 gen-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 774K Jan 23 20:09 gen-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 787K Jan 23 20:09 gen-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 794K Jan 23 20:09 gen-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 809K Jan 23 20:09 gen-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 88K Jan 23 20:09 gen-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 805K Jan 23 20:09 gen-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 818K Jan 23 20:09 gen-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 817K Jan 23 20:09 gen-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 832K Jan 23 20:09 gen-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 844K Jan 23 20:09 gen-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 855K Jan 23 20:09 gen-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 852K Jan 23 20:09 gen-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 868K Jan 23 20:09 gen-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 871K Jan 23 20:09 gen-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 875K Jan 23 20:09 gen-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 93K Jan 23 20:09 gen-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 883K Jan 23 20:09 gen-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 881K Jan 23 20:09 gen-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 898K Jan 23 20:09 gen-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 902K Jan 23 20:09 gen-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 924K Jan 23 20:09 gen-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 916K Jan 23 20:09 gen-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 928K Jan 23 20:09 gen-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 939K Jan 23 20:09 gen-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 940K Jan 23 20:09 gen-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 956K Jan 23 20:09 gen-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 105K Jan 23 20:09 gen-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 945K Jan 23 20:09 gen-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 968K Jan 23 20:09 gen-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 968K Jan 23 20:09 gen-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 981K Jan 23 20:09 gen-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 981K Jan 23 20:09 gen-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 993K Jan 23 20:09 gen-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 1001K Jan 23 20:09 gen-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1015K Jan 23 20:09 gen-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1014K Jan 23 20:09 gen-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1019K Jan 23 20:09 gen-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 109K Jan 23 20:09 gen-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 1023K Jan 23 20:09 gen-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 25K Jan 23 20:09 gen-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 116K Jan 23 20:09 gen-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 127K Jan 23 20:09 gen-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 133K Jan 23 20:09 gen-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 140K Jan 23 20:09 gen-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 147K Jan 23 20:09 gen-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 30K Jan 23 20:09 gen-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 154K Jan 23 20:09 gen-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 162K Jan 23 20:09 gen-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 169K Jan 23 20:09 gen-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 176K Jan 23 20:09 gen-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 186K Jan 23 20:09 gen-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 34K Jan 23 20:09 gen-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 193K Jan 23 20:09 gen-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 200K Jan 23 20:09 gen-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 204K Jan 23 20:09 gen-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 213K Jan 23 20:09 gen-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 219K Jan 23 20:09 gen-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 40K Jan 23 20:09 gen-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 228K Jan 23 20:09 gen-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 gen-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 gen-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 239K Jan 23 20:09 gen-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 244K Jan 23 20:09 gen-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 247K Jan 23 20:09 gen-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 261K Jan 23 20:09 gen-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 45K Jan 23 20:09 gen-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 265K Jan 23 20:09 gen-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 273K Jan 23 20:09 gen-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 283K Jan 23 20:09 gen-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 287K Jan 23 20:09 gen-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 294K Jan 23 20:09 gen-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 49K Jan 23 20:09 gen-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 301K Jan 23 20:09 gen-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 304K Jan 23 20:09 gen-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 314K Jan 23 20:09 gen-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 319K Jan 23 20:09 gen-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 333K Jan 23 20:09 gen-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 54K Jan 23 20:09 gen-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 339K Jan 23 20:09 gen-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 348K Jan 23 20:09 gen-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 355K Jan 23 20:09 gen-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 359K Jan 23 20:09 gen-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 358K Jan 23 20:09 gen-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 20:09 gen-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 60K Jan 23 20:09 gen-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 372K Jan 23 20:09 gen-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 385K Jan 23 20:09 gen-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 389K Jan 23 20:09 gen-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 391K Jan 23 20:09 gen-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 398K Jan 23 20:09 gen-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 63K Jan 23 20:09 gen-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 411K Jan 23 20:09 gen-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 423K Jan 23 20:09 gen-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 423K Jan 23 20:09 gen-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 433K Jan 23 20:09 gen-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 435K Jan 23 20:09 gen-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 20:09 gen-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 20:09 gen-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 5.8M Jan 23 20:09 gen-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 70K Jan 23 20:09 gen-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 445K Jan 23 20:09 gen-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.8M Jan 23 20:09 gen-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 456K Jan 23 20:09 gen-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 455K Jan 23 20:09 gen-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 468K Jan 23 20:09 gen-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 474K Jan 23 20:09 gen-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 71K Jan 23 20:09 gen-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 486K Jan 23 20:09 gen-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 492K Jan 23 20:09 gen-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 492K Jan 23 20:09 gen-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 509K Jan 23 20:09 gen-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 507K Jan 23 20:09 gen-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 78K Jan 23 20:09 gen-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 518K Jan 23 20:09 gen-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 524K Jan 23 20:09 gen-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 530K Jan 23 20:09 gen-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 534K Jan 23 20:09 gen-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 552K Jan 23 20:09 gen-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 82K Jan 23 20:09 gen-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 555K Jan 23 20:09 gen-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 566K Jan 23 20:09 gen-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 575K Jan 23 20:09 gen-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 571K Jan 23 20:09 gen-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 581K Jan 23 20:09 gen-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 87K Jan 23 20:09 gen-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 593K Jan 23 20:09 gen-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 595K Jan 23 20:09 gen-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 601K Jan 23 20:09 gen-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 614K Jan 23 20:09 gen-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 623K Jan 23 20:09 gen-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 93K Jan 23 20:09 gen-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 631K Jan 23 20:09 gen-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 628K Jan 23 20:09 gen-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 645K Jan 23 20:09 gen-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 649K Jan 23 20:09 gen-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 656K Jan 23 20:09 gen-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 98K Jan 23 20:09 gen-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 667K Jan 23 20:09 gen-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 671K Jan 23 20:09 gen-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 680K Jan 23 20:09 gen-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 690K Jan 23 20:09 gen-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 692K Jan 23 20:09 gen-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 20:09 gen-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 694K Jan 23 20:09 gen-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 713K Jan 23 20:09 gen-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 714K Jan 23 20:09 gen-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 725K Jan 23 20:09 gen-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 732K Jan 23 20:09 gen-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 shuffle-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 shuffle-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 shuffle-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 shuffle-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 shuffle-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 shuffle-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 shuffle-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 shuffle-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 8.0M Jan 23 20:09 shuffle-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 shuffle-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 shuffle-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 shuffle-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 12K Jan 23 20:09 word-2-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 20:09 word-4-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 3000 words dataset\n", + "# \n", + "for i in {110..3000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 3000+ - 400 words dataset\n", + "# \n", + "for i in {3025..6000..25} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e66e145e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolving data files: 100%|███████████████| 862/862 [00:00<00:00, 148632.68it/s]\n", + "Filter (num_proc=96): 16%|▊ | 48773/312109 [00:25<02:19, 1882.75 examples/s]\n", + "Map (num_proc=96): 100%|██████| 307741/307741 [00:05<00:00, 54820.73 examples/s]\n", + "Map (num_proc=96): 100%|███████| 307741/307741 [00:42<00:00, 7168.62 examples/s]\n", + "Map (num_proc=96): 100%|█████████| 36846/36846 [00:13<00:00, 2761.34 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 36846/36846 [00:01<00:00, 23444.71 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 1547/1547 [00:00<00:00, 34203.75 exampl\n" + ] + } + ], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "6413a747", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0bdba654", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-23 20:13:18,992] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-3B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=8192'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-3B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=8192'].\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 3745190225\n", + "Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 256\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 4\n", + " - effective_batch_size: 256\n", + "\n", + "[rank: 0] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-23 20:13:51,892] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:51,961] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,008] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,062] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,079] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,080] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,114] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 1] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 3] Seed set to 3745190225\n", + "[rank: 2] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 7] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 6] Seed set to 3745190225\n", + "[rank: 4] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 5] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 6] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 7] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 3] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 1] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 2] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_201446-ldvjsc3w\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/ldvjsc3w\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 2.000e-04 (0.0002)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.014776945114135742 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10196971893310547 seconds\n", + "Time to load fused_adam op: 0.1021115779876709 seconds\n", + "Time to load fused_adam op: 0.10188078880310059 seconds\n", + "Time to load fused_adam op: 0.10222482681274414 secondsTime to load fused_adam op: 0.10220718383789062 seconds\n", + "\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10234403610229492 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.1023564338684082 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 167 M \n", + "1 | blocks | ModuleList | 2.7 B \n", + "2 | ln_out | LayerNorm | 5.1 K \n", + "3 | head | Linear | 167 M \n", + "--------------------------------------\n", + "3.1 B Trainable params\n", + "0 Non-trainable params\n", + "3.1 B Total params\n", + "12,251.996Total estimated model params size (MB)\n", + "Epoch 0: 3%| | 18/576 [03:31<1:49:28, 0.08it/s, v_num=sc3w, train/loss=0.0732" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-2-tune.yaml\" \\\n", + " --model.load_model=\"../model/Memory-Tune-Stage-1-{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-2 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=8 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b479789d", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e248cba", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 1000 4000\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 4000 8000" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 6073.787154, + "end_time": "2024-01-23T13:12:24.967410", + "environment_variables": {}, + "exception": null, + "input_path": "./World-3B-mem-finetune.ipynb", + "output_path": "./World-3B-mem-finetune.output.ipynb", + "parameters": {}, + "start_time": "2024-01-23T11:31:11.180256", + "version": "2.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d57efd218d43c1637f2eeabe1db355d446d6c021 Mon Sep 17 00:00:00 2001 From: "Eugene Cheah (picocreator)" Date: Wed, 24 Jan 2024 03:21:32 +0000 Subject: [PATCH 23/23] 3B & 7B runs --- .../memory-test/World-3B-mem-finetune.ipynb | 497 ++- .../memory-test/World-7B-mem-finetune.ipynb | 3274 +++++++++++++++++ .../v5-exp/memory-test/run-all-finetune.sh | 13 + 3 files changed, 3776 insertions(+), 8 deletions(-) create mode 100644 notebook/rwkv-x-exp/v5-exp/memory-test/World-7B-mem-finetune.ipynb create mode 100755 notebook/rwkv-x-exp/v5-exp/memory-test/run-all-finetune.sh diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb index 7e316ffa..6e506456 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "e8b16f4a", "metadata": { "execution": { @@ -176,7 +176,7 @@ "tags": [] }, "source": [ - "## Finetune 1 (0 -> 4k) : Dataset preperation\n", + "## Finetune 1 (0 -> 2x2k) : Dataset preperation\n", "\n", "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." ] @@ -328,7 +328,7 @@ "tags": [] }, "source": [ - "## Finetune 1 (0 -> 4k) : The actual tune!" + "## Finetune 1 (0 -> 2x2k) : The actual tune!" ] }, { @@ -2506,7 +2506,254 @@ "0 Non-trainable params\n", "3.1 B Total params\n", "12,251.996Total estimated model params size (MB)\n", - "Epoch 0: 3%| | 18/576 [03:31<1:49:28, 0.08it/s, v_num=sc3w, train/loss=0.0732" + "Epoch 0: 17%|▏| 100/576 [18:37<1:28:41, 0.09it/s, v_num=sc3w, train/loss=0.007/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|█| 576/576 [1:37:18<00:00, 0.10it/s, v_num=sc3w, train/loss=1.270\n", + "Validation: | | 0/? [00:00 This project assumes you have the rwkv-infctx conda env setup, and you are executing in that environment - see the main README.md for the conda env setup steps" + ] + }, + { + "cell_type": "markdown", + "id": "f6aac483", + "metadata": { + "papermill": { + "duration": 0.004873, + "end_time": "2024-01-23T07:52:11.188157", + "exception": false, + "start_time": "2024-01-23T07:52:11.183284", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Configure your environment settings\n", + "(!Important: you will need to rerun the below cell, if you restart your kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0c538903", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:11.196870Z", + "iopub.status.busy": "2024-01-23T07:52:11.196676Z", + "iopub.status.idle": "2024-01-23T07:52:11.207800Z", + "shell.execute_reply": "2024-01-23T07:52:11.206964Z" + }, + "papermill": { + "duration": 0.017119, + "end_time": "2024-01-23T07:52:11.210197", + "exception": false, + "start_time": "2024-01-23T07:52:11.193078", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_2\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_2\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"[8xA100] RWKV-v5-7B-World\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-7B-world.pth\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "MEMORY_SCRIPT_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./notebook/util-scripts/memory_script\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "markdown", + "id": "68a6f8e5", + "metadata": { + "papermill": { + "duration": 0.00356, + "end_time": "2024-01-23T07:52:11.220001", + "exception": false, + "start_time": "2024-01-23T07:52:11.216441", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Download the pretrained model\n", + "(if you want to skip the the basemodel train + instruct tune)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d61f8a7a", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:11.226610Z", + "iopub.status.busy": "2024-01-23T07:52:11.225973Z", + "iopub.status.idle": "2024-01-23T07:52:11.717381Z", + "shell.execute_reply": "2024-01-23T07:52:11.716269Z" + }, + "papermill": { + "duration": 0.497049, + "end_time": "2024-01-23T07:52:11.719954", + "exception": false, + "start_time": "2024-01-23T07:52:11.222905", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "3928b63f", + "metadata": { + "papermill": { + "duration": 0.004645, + "end_time": "2024-01-23T07:52:11.730080", + "exception": false, + "start_time": "2024-01-23T07:52:11.725435", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 2*2k) : Dataset preperation\n", + "\n", + "Stage 1, handles total context size of 4096. Meaning it will be tuned for memory task of approximately 2k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b100d015", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:11.737515Z", + "iopub.status.busy": "2024-01-23T07:52:11.736355Z", + "iopub.status.idle": "2024-01-23T07:52:15.468489Z", + "shell.execute_reply": "2024-01-23T07:52:15.467116Z" + }, + "papermill": { + "duration": 3.738786, + "end_time": "2024-01-23T07:52:15.471307", + "exception": false, + "start_time": "2024-01-23T07:52:11.732521", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Folder and eval pip setup\n", + "!cp -r \"{MEMORY_SCRIPT_DIR}/\" \"{NOTEBOOK_DIR}/\"\n", + "!python3 -m pip install rwkv asyncio aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a80b46d0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:15.483048Z", + "iopub.status.busy": "2024-01-23T07:52:15.482635Z", + "iopub.status.idle": "2024-01-23T07:52:18.671801Z", + "shell.execute_reply": "2024-01-23T07:52:18.670751Z" + }, + "papermill": { + "duration": 3.348865, + "end_time": "2024-01-23T07:52:18.826676", + "exception": false, + "start_time": "2024-01-23T07:52:15.477811", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for < 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 150 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {110..200..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {210..4000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e0376d7", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:19.064200Z", + "iopub.status.busy": "2024-01-23T07:52:19.063812Z", + "iopub.status.idle": "2024-01-23T07:55:09.987257Z", + "shell.execute_reply": "2024-01-23T07:55:09.985659Z" + }, + "papermill": { + "duration": 171.009128, + "end_time": "2024-01-23T07:55:09.990364", + "exception": false, + "start_time": "2024-01-23T07:52:18.981236", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-1-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-1-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "4a778ddb", + "metadata": { + "papermill": { + "duration": 0.114062, + "end_time": "2024-01-23T07:55:10.231871", + "exception": false, + "start_time": "2024-01-23T07:55:10.117809", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 2*2k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2b4f921", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:55:10.463735Z", + "iopub.status.busy": "2024-01-23T07:55:10.463303Z", + "iopub.status.idle": "2024-01-23T11:18:37.403552Z", + "shell.execute_reply": "2024-01-23T11:18:37.402122Z" + }, + "papermill": { + "duration": 12207.060283, + "end_time": "2024-01-23T11:18:37.406917", + "exception": false, + "start_time": "2024-01-23T07:55:10.346634", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-1-tune.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-1 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=4 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae68ae18", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:18:37.833406Z", + "iopub.status.busy": "2024-01-23T11:18:37.832966Z", + "iopub.status.idle": "2024-01-23T11:19:35.452202Z", + "shell.execute_reply": "2024-01-23T11:19:35.450809Z" + }, + "papermill": { + "duration": 57.804367, + "end_time": "2024-01-23T11:19:35.454988", + "exception": false, + "start_time": "2024-01-23T11:18:37.650621", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30726953", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:22:23.370145Z", + "iopub.status.busy": "2024-01-23T11:22:23.369718Z", + "iopub.status.idle": "2024-01-23T11:31:09.313399Z", + "shell.execute_reply": "2024-01-23T11:31:09.312027Z" + }, + "papermill": { + "duration": 526.138711, + "end_time": "2024-01-23T11:31:09.316221", + "exception": false, + "start_time": "2024-01-23T11:22:23.177510", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\" \"none\" 1000 3000" + ] + }, + { + "cell_type": "markdown", + "id": "ddec3af2", + "metadata": {}, + "source": [ + "## Finetune 2 (0 -> 2*4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b3cafb4", + "metadata": {}, + "outputs": [], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 3000 words dataset\n", + "# \n", + "for i in {110..3000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 3000+ - 400 words dataset\n", + "# \n", + "for i in {3000..6000..25} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60e9c3ab", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "2b1343c6", + "metadata": {}, + "source": [ + "## Finetune 1 (0 -> 2*2k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a120238c", + "metadata": {}, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-2-tune.yaml\" \\\n", + " --model.load_model=\"../model/Memory-Tune-Stage-1-{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-2 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=4 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d4041ad", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a793df6", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 1000 4000\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 4000 8000" + ] + }, + { + "cell_type": "markdown", + "id": "9cc1c2df", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bae4ec97", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n", + "Generated JSONL file with - 15 max words, 100 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 4 max words, 100 samples - at ./dataset/word-4-count.jsonl\n", + "Generated JSONL file with - 2 max words, 100 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 5 max words, 100 samples - at ./dataset/gen-word-5-count.jsonlGenerated JSONL file with - 10 max words, 100 samples - at ./dataset/gen-word-10-count.jsonl\n", + "\n", + "Generated JSONL file with - 30 max words, 100 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 35 max words, 100 samples - at ./dataset/gen-word-35-count.jsonl\n", + "Generated JSONL file with - 25 max words, 100 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 20 max words, 100 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 50 max words, 100 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 40 max words, 100 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 70 max words, 100 samples - at ./dataset/gen-word-70-count.jsonl\n", + "Generated JSONL file with - 80 max words, 100 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 55 max words, 100 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 75 max words, 100 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 95 max words, 100 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 45 max words, 100 samples - at ./dataset/gen-word-45-count.jsonl\n", + "Generated JSONL file with - 60 max words, 100 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 120 max words, 75 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 90 max words, 100 samples - at ./dataset/gen-word-90-count.jsonl\n", + "Generated JSONL file with - 100 max words, 100 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 110 max words, 75 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 130 max words, 75 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 140 max words, 75 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated JSONL file with - 85 max words, 100 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 150 max words, 75 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 180 max words, 75 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 220 max words, 75 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 250 max words, 75 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 260 max words, 75 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated JSONL file with - 65 max words, 100 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 190 max words, 75 samples - at ./dataset/gen-word-190-count.jsonl\n", + "Generated JSONL file with - 200 max words, 75 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 170 max words, 75 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 290 max words, 75 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 240 max words, 75 samples - at ./dataset/gen-word-240-count.jsonl\n", + "Generated JSONL file with - 310 max words, 75 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated JSONL file with - 380 max words, 75 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated JSONL file with - 350 max words, 75 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated JSONL file with - 330 max words, 75 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 340 max words, 75 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 230 max words, 75 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 210 max words, 75 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 160 max words, 75 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated a single JSONL file with 1063 samples (75 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated a single JSONL file with 1288 samples (75 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated a single JSONL file with 1481 samples (75 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated JSONL file with - 470 max words, 75 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated a single JSONL file with 1381 samples (75 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated a single JSONL file with 1158 samples (75 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated a single JSONL file with 730 samples (75 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated JSONL file with - 490 max words, 75 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated a single JSONL file with 797 samples (75 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "Generated JSONL file with - 370 max words, 75 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated JSONL file with - 360 max words, 75 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated JSONL file with - 390 max words, 75 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated a single JSONL file with 1215 samples (75 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", + "Generated a single JSONL file with 685 samples (75 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated JSONL file with - 450 max words, 75 samples - at ./dataset/gen-word-450-count.jsonl\n", + "Generated JSONL file with - 530 max words, 75 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated a single JSONL file with 707 samples (75 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated a single JSONL file with 750 samples (75 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated a single JSONL file with 688 samples (75 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "Generated JSONL file with - 430 max words, 75 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 510 max words, 75 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 550 max words, 75 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated a single JSONL file with 3293 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated JSONL file with - 580 max words, 75 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated a single JSONL file with 524 samples (75 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated a single JSONL file with 697 samples (75 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated JSONL file with - 300 max words, 75 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated JSONL file with - 710 max words, 75 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated JSONL file with - 810 max words, 75 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated a single JSONL file with 2937 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 3532 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated a single JSONL file with 2671 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated a single JSONL file with 3130 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated JSONL file with - 700 max words, 75 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "Generated JSONL file with - 730 max words, 75 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 75 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated JSONL file with - 570 max words, 75 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated JSONL file with - 630 max words, 75 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated JSONL file with - 870 max words, 75 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated JSONL file with - 280 max words, 75 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated JSONL file with - 420 max words, 75 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated a single JSONL file with 2794 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated a single JSONL file with 1037 samples (75 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated JSONL file with - 780 max words, 75 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated JSONL file with - 440 max words, 75 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated a single JSONL file with 521 samples (75 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated JSONL file with - 590 max words, 75 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated JSONL file with - 760 max words, 75 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated JSONL file with - 640 max words, 75 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated a single JSONL file with 3770 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated JSONL file with - 480 max words, 75 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 740 max words, 75 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated JSONL file with - 770 max words, 75 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated JSONL file with - 880 max words, 75 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated a single JSONL file with 4805 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated a single JSONL file with 4074 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", + "Generated a single JSONL file with 529 samples (75 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated a single JSONL file with 5231 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated JSONL file with - 270 max words, 75 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 610 max words, 75 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated JSONL file with - 750 max words, 75 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated JSONL file with - 860 max words, 75 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 500 max words, 75 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated a single JSONL file with 682 samples (75 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated JSONL file with - 680 max words, 75 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated a single JSONL file with 7562 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated JSONL file with - 540 max words, 75 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated a single JSONL file with 754 samples (75 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated JSONL file with - 400 max words, 75 samples - at ./dataset/gen-word-400-count.jsonl\n", + "Generated JSONL file with - 460 max words, 75 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated JSONL file with - 520 max words, 75 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated a single JSONL file with 434 samples (75 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated a single JSONL file with 10604 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated JSONL file with - 560 max words, 75 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated a single JSONL file with 527 samples (75 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated JSONL file with - 320 max words, 75 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 690 max words, 75 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated a single JSONL file with 8750 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated JSONL file with - 830 max words, 75 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated a single JSONL file with 6559 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated JSONL file with - 410 max words, 75 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated JSONL file with - 910 max words, 75 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated JSONL file with - 650 max words, 75 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated JSONL file with - 790 max words, 75 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 75 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated JSONL file with - 720 max words, 75 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated JSONL file with - 820 max words, 75 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated a single JSONL file with 540 samples (75 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated a single JSONL file with 4388 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated JSONL file with - 930 max words, 75 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated a single JSONL file with 1023 samples (75 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated JSONL file with - 600 max words, 75 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated JSONL file with - 620 max words, 75 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated a single JSONL file with 440 samples (75 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated JSONL file with - 920 max words, 75 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 670 max words, 75 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated JSONL file with - 970 max words, 75 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated JSONL file with - 990 max words, 75 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated JSONL file with - 660 max words, 75 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated JSONL file with - 800 max words, 75 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 75 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated JSONL file with - 980 max words, 75 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated JSONL file with - 960 max words, 75 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 5882 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated a single JSONL file with 1013 samples (75 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 75 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated JSONL file with - 900 max words, 75 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated a single JSONL file with 13011 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", + "Generated JSONL file with - 890 max words, 75 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 370 samples (75 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 75 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated a single JSONL file with 221 samples (75 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 75 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 75 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated a single JSONL file with 449 samples (75 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated JSONL file with - 840 max words, 75 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated a single JSONL file with 693 samples (75 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonlGenerated a single JSONL file with 443 samples (75 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 1097 samples (75 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated a single JSONL file with 529 samples (75 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated a single JSONL file with 435 samples (75 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated JSONL file with - 940 max words, 75 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 75 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated a single JSONL file with 17788 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated a single JSONL file with 234 samples (75 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated a single JSONL file with 296 samples (75 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated a single JSONL file with 242 samples (75 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 75 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated a single JSONL file with 239 samples (75 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 75 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated JSONL file with - 850 max words, 75 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated JSONL file with - 950 max words, 75 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated a single JSONL file with 234 samples (75 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 75 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated a single JSONL file with 297 samples (75 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 75 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 75 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated a single JSONL file with 239 samples (75 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 75 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 75 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 75 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 75 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 75 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 75 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 75 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated a single JSONL file with 446 samples (75 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated a single JSONL file with 237 samples (75 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 75 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated JSONL file with - 1820 max words, 75 samples - at ./dataset/gen-word-1820-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 75 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 75 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 75 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated JSONL file with - 1240 max words, 75 samples - at ./dataset/gen-word-1240-count.jsonl\n", + "Generated JSONL file with - 2900 max words, 75 samples - at ./dataset/gen-word-2900-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 75 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 75 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 75 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 75 samples - at ./dataset/gen-word-1320-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 75 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated JSONL file with - 1760 max words, 75 samples - at ./dataset/gen-word-1760-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 75 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 75 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 75 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 26100 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 75 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 75 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated a single JSONL file with 189 samples (75 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 75 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 75 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 194 samples (75 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 75 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated a single JSONL file with 193 samples (75 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", + "Generated JSONL file with - 1560 max words, 75 samples - at ./dataset/gen-word-1560-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 75 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 75 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated a single JSONL file with 187 samples (75 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 75 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated a single JSONL file with 188 samples (75 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated JSONL file with - 1710 max words, 75 samples - at ./dataset/gen-word-1710-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", + "Generated JSONL file with - 1440 max words, 75 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "Generated JSONL file with - 1310 max words, 75 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated a single JSONL file with 189 samples (75 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", + "Generated a single JSONL file with 184 samples (75 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 75 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 75 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 75 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 75 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 75 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated JSONL file with - 1850 max words, 75 samples - at ./dataset/gen-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 75 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 75 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated JSONL file with - 1580 max words, 75 samples - at ./dataset/gen-word-1580-count.jsonl\n", + "Generated JSONL file with - 1600 max words, 75 samples - at ./dataset/gen-word-1600-count.jsonl\n", + "Generated JSONL file with - 1550 max words, 75 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated JSONL file with - 1530 max words, 75 samples - at ./dataset/gen-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated JSONL file with - 1510 max words, 75 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated JSONL file with - 1660 max words, 75 samples - at ./dataset/gen-word-1660-count.jsonl\n", + "Generated JSONL file with - 1540 max words, 75 samples - at ./dataset/gen-word-1540-count.jsonl\n", + "Generated JSONL file with - 1520 max words, 75 samples - at ./dataset/gen-word-1520-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 75 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated JSONL file with - 1570 max words, 75 samples - at ./dataset/gen-word-1570-count.jsonl\n", + "Generated JSONL file with - 1670 max words, 75 samples - at ./dataset/gen-word-1670-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonlGenerated a single JSONL file with 150 samples (75 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated JSONL file with - 1630 max words, 75 samples - at ./dataset/gen-word-1630-count.jsonl\n", + "Generated JSONL file with - 1650 max words, 75 samples - at ./dataset/gen-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", + "Generated JSONL file with - 1680 max words, 75 samples - at ./dataset/gen-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated JSONL file with - 1700 max words, 75 samples - at ./dataset/gen-word-1700-count.jsonl\n", + "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", + "Generated JSONL file with - 1620 max words, 75 samples - at ./dataset/gen-word-1620-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated JSONL file with - 1640 max words, 75 samples - at ./dataset/gen-word-1640-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", + "Generated JSONL file with - 1720 max words, 75 samples - at ./dataset/gen-word-1720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", + "Generated JSONL file with - 1690 max words, 75 samples - at ./dataset/gen-word-1690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", + "Generated JSONL file with - 1770 max words, 75 samples - at ./dataset/gen-word-1770-count.jsonl\n", + "Generated JSONL file with - 1830 max words, 75 samples - at ./dataset/gen-word-1830-count.jsonl\n", + "Generated JSONL file with - 1730 max words, 75 samples - at ./dataset/gen-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", + "Generated JSONL file with - 1800 max words, 75 samples - at ./dataset/gen-word-1800-count.jsonl\n", + "Generated JSONL file with - 1810 max words, 75 samples - at ./dataset/gen-word-1810-count.jsonl\n", + "Generated JSONL file with - 1590 max words, 75 samples - at ./dataset/gen-word-1590-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", + "Generated JSONL file with - 1840 max words, 75 samples - at ./dataset/gen-word-1840-count.jsonl\n", + "Generated JSONL file with - 1780 max words, 75 samples - at ./dataset/gen-word-1780-count.jsonl\n", + "Generated JSONL file with - 1610 max words, 75 samples - at ./dataset/gen-word-1610-count.jsonl\n", + "Generated JSONL file with - 1790 max words, 75 samples - at ./dataset/gen-word-1790-count.jsonl\n", + "Generated JSONL file with - 1750 max words, 75 samples - at ./dataset/gen-word-1750-count.jsonl\n", + "Generated JSONL file with - 1740 max words, 75 samples - at ./dataset/gen-word-1740-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", + "Generated JSONL file with - 1900 max words, 75 samples - at ./dataset/gen-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", + "Generated JSONL file with - 1860 max words, 75 samples - at ./dataset/gen-word-1860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", + "Generated JSONL file with - 2220 max words, 75 samples - at ./dataset/gen-word-2220-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", + "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", + "Generated JSONL file with - 1920 max words, 75 samples - at ./dataset/gen-word-1920-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", + "Generated JSONL file with - 1880 max words, 75 samples - at ./dataset/gen-word-1880-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", + "Generated JSONL file with - 1910 max words, 75 samples - at ./dataset/gen-word-1910-count.jsonl\n", + "Generated JSONL file with - 1930 max words, 75 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", + "Generated JSONL file with - 1870 max words, 75 samples - at ./dataset/gen-word-1870-count.jsonl\n", + "Generated JSONL file with - 1890 max words, 75 samples - at ./dataset/gen-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", + "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", + "Generated JSONL file with - 1990 max words, 75 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", + "Generated JSONL file with - 2000 max words, 75 samples - at ./dataset/gen-word-2000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonl\n", + "Generated a single JSONL file with 55753 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", + "Generated JSONL file with - 1970 max words, 75 samples - at ./dataset/gen-word-1970-count.jsonl\n", + "Generated JSONL file with - 2020 max words, 75 samples - at ./dataset/gen-word-2020-count.jsonl\n", + "Generated JSONL file with - 2040 max words, 75 samples - at ./dataset/gen-word-2040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonl\n", + "Generated JSONL file with - 2070 max words, 75 samples - at ./dataset/gen-word-2070-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", + "Generated JSONL file with - 2030 max words, 75 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated JSONL file with - 2090 max words, 75 samples - at ./dataset/gen-word-2090-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated JSONL file with - 1950 max words, 75 samples - at ./dataset/gen-word-1950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", + "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", + "Generated JSONL file with - 2150 max words, 75 samples - at ./dataset/gen-word-2150-count.jsonl\n", + "Generated JSONL file with - 2390 max words, 75 samples - at ./dataset/gen-word-2390-count.jsonl\n", + "Generated JSONL file with - 2120 max words, 75 samples - at ./dataset/gen-word-2120-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated JSONL file with - 2140 max words, 75 samples - at ./dataset/gen-word-2140-count.jsonl\n", + "Generated a single JSONL file with 84 samples (75 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", + "Generated JSONL file with - 2200 max words, 75 samples - at ./dataset/gen-word-2200-count.jsonl\n", + "Generated JSONL file with - 2270 max words, 75 samples - at ./dataset/gen-word-2270-count.jsonl\n", + "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", + "Generated a single JSONL file with 110 samples (75 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", + "Generated JSONL file with - 2190 max words, 75 samples - at ./dataset/gen-word-2190-count.jsonl\n", + "Generated JSONL file with - 2480 max words, 75 samples - at ./dataset/gen-word-2480-count.jsonl\n", + "Generated JSONL file with - 2580 max words, 75 samples - at ./dataset/gen-word-2580-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", + "Generated JSONL file with - 2280 max words, 75 samples - at ./dataset/gen-word-2280-count.jsonl\n", + "Generated JSONL file with - 2100 max words, 75 samples - at ./dataset/gen-word-2100-count.jsonl\n", + "Generated JSONL file with - 1980 max words, 75 samples - at ./dataset/gen-word-1980-count.jsonl\n", + "Generated JSONL file with - 2050 max words, 75 samples - at ./dataset/gen-word-2050-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", + "Generated JSONL file with - 2080 max words, 75 samples - at ./dataset/gen-word-2080-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", + "Generated JSONL file with - 2010 max words, 75 samples - at ./dataset/gen-word-2010-count.jsonl\n", + "Generated JSONL file with - 2110 max words, 75 samples - at ./dataset/gen-word-2110-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", + "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", + "Generated JSONL file with - 1960 max words, 75 samples - at ./dataset/gen-word-1960-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonlGenerated a single JSONL file with 150 samples (75 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", + "\n", + "Generated JSONL file with - 2130 max words, 75 samples - at ./dataset/gen-word-2130-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", + "Generated JSONL file with - 2170 max words, 75 samples - at ./dataset/gen-word-2170-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", + "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", + "Generated JSONL file with - 2160 max words, 75 samples - at ./dataset/gen-word-2160-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4575 max words - at ./dataset/shuffle-word-4575-count.jsonl\n", + "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", + "Generated JSONL file with - 2180 max words, 75 samples - at ./dataset/gen-word-2180-count.jsonl\n", + "Generated JSONL file with - 2410 max words, 75 samples - at ./dataset/gen-word-2410-count.jsonl\n", + "Generated JSONL file with - 2400 max words, 75 samples - at ./dataset/gen-word-2400-count.jsonl\n", + "Generated JSONL file with - 2560 max words, 75 samples - at ./dataset/gen-word-2560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", + "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", + "Generated JSONL file with - 2570 max words, 75 samples - at ./dataset/gen-word-2570-count.jsonl\n", + "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", + "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "Generated JSONL file with - 2250 max words, 75 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated JSONL file with - 1940 max words, 75 samples - at ./dataset/gen-word-1940-count.jsonl\n", + "Generated JSONL file with - 2260 max words, 75 samples - at ./dataset/gen-word-2260-count.jsonl\n", + "Generated JSONL file with - 2060 max words, 75 samples - at ./dataset/gen-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", + "Generated JSONL file with - 2230 max words, 75 samples - at ./dataset/gen-word-2230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", + "Generated JSONL file with - 6000 max words, 100 samples - at ./dataset/gen-word-6000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", + "Generated JSONL file with - 2660 max words, 75 samples - at ./dataset/gen-word-2660-count.jsonl\n", + "Generated JSONL file with - 2760 max words, 75 samples - at ./dataset/gen-word-2760-count.jsonl\n", + "Generated JSONL file with - 2720 max words, 75 samples - at ./dataset/gen-word-2720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", + "Generated JSONL file with - 2330 max words, 75 samples - at ./dataset/gen-word-2330-count.jsonl\n", + "Generated JSONL file with - 2350 max words, 75 samples - at ./dataset/gen-word-2350-count.jsonl\n", + "Generated JSONL file with - 2310 max words, 75 samples - at ./dataset/gen-word-2310-count.jsonl\n", + "Generated JSONL file with - 2370 max words, 75 samples - at ./dataset/gen-word-2370-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", + "Generated JSONL file with - 2380 max words, 75 samples - at ./dataset/gen-word-2380-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", + "Generated JSONL file with - 2320 max words, 75 samples - at ./dataset/gen-word-2320-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonl\n", + "Generated JSONL file with - 2210 max words, 75 samples - at ./dataset/gen-word-2210-count.jsonl\n", + "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", + "Generated a single JSONL file with 141 samples (75 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", + "Generated JSONL file with - 2240 max words, 75 samples - at ./dataset/gen-word-2240-count.jsonl\n", + "Generated JSONL file with - 2450 max words, 75 samples - at ./dataset/gen-word-2450-count.jsonl\n", + "Generated JSONL file with - 2290 max words, 75 samples - at ./dataset/gen-word-2290-count.jsonl\n", + "Generated JSONL file with - 2430 max words, 75 samples - at ./dataset/gen-word-2430-count.jsonl\n", + "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", + "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", + "Generated JSONL file with - 2510 max words, 75 samples - at ./dataset/gen-word-2510-count.jsonl\n", + "Generated JSONL file with - 2470 max words, 75 samples - at ./dataset/gen-word-2470-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", + "Generated a single JSONL file with 134 samples (75 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", + "Generated a single JSONL file with 117 samples (75 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", + "Generated a single JSONL file with 140 samples (75 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", + "Generated JSONL file with - 2420 max words, 75 samples - at ./dataset/gen-word-2420-count.jsonl\n", + "Generated JSONL file with - 2440 max words, 75 samples - at ./dataset/gen-word-2440-count.jsonl\n", + "Generated a single JSONL file with 141 samples (75 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonl\n", + "Generated a single JSONL file with 136 samples (75 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", + "Generated JSONL file with - 2360 max words, 75 samples - at ./dataset/gen-word-2360-count.jsonl\n", + "Generated a single JSONL file with 140 samples (75 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", + "Generated a single JSONL file with 112 samples (75 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", + "Generated JSONL file with - 2460 max words, 75 samples - at ./dataset/gen-word-2460-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonl\n", + "Generated a single JSONL file with 132 samples (75 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", + "Generated JSONL file with - 2340 max words, 75 samples - at ./dataset/gen-word-2340-count.jsonl\n", + "Generated JSONL file with - 2300 max words, 75 samples - at ./dataset/gen-word-2300-count.jsonl\n", + "Generated a single JSONL file with 116 samples (75 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", + "Generated a single JSONL file with 117 samples (75 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", + "Generated JSONL file with - 2530 max words, 75 samples - at ./dataset/gen-word-2530-count.jsonl\n", + "Generated JSONL file with - 2520 max words, 75 samples - at ./dataset/gen-word-2520-count.jsonl\n", + "Generated JSONL file with - 2550 max words, 75 samples - at ./dataset/gen-word-2550-count.jsonl\n", + "Generated JSONL file with - 2600 max words, 75 samples - at ./dataset/gen-word-2600-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated a single JSONL file with 111 samples (75 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", + "Generated JSONL file with - 2610 max words, 75 samples - at ./dataset/gen-word-2610-count.jsonl\n", + "Generated JSONL file with - 2540 max words, 75 samples - at ./dataset/gen-word-2540-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", + "Generated JSONL file with - 2640 max words, 75 samples - at ./dataset/gen-word-2640-count.jsonl\n", + "Generated JSONL file with - 2590 max words, 75 samples - at ./dataset/gen-word-2590-count.jsonl\n", + "Generated JSONL file with - 2650 max words, 75 samples - at ./dataset/gen-word-2650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4075 max words - at ./dataset/shuffle-word-4075-count.jsonl\n", + "Generated JSONL file with - 2710 max words, 75 samples - at ./dataset/gen-word-2710-count.jsonl\n", + "Generated JSONL file with - 2670 max words, 75 samples - at ./dataset/gen-word-2670-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", + "Generated JSONL file with - 2690 max words, 75 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", + "Generated JSONL file with - 2620 max words, 75 samples - at ./dataset/gen-word-2620-count.jsonl\n", + "Generated a single JSONL file with 88 samples (75 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", + "Generated JSONL file with - 2630 max words, 75 samples - at ./dataset/gen-word-2630-count.jsonl\n", + "Generated a single JSONL file with 113 samples (75 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", + "Generated JSONL file with - 2740 max words, 75 samples - at ./dataset/gen-word-2740-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", + "Generated JSONL file with - 2680 max words, 75 samples - at ./dataset/gen-word-2680-count.jsonl\n", + "Generated a single JSONL file with 111 samples (75 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", + "Generated a single JSONL file with 114 samples (75 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", + "Generated JSONL file with - 2750 max words, 75 samples - at ./dataset/gen-word-2750-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", + "Generated JSONL file with - 2730 max words, 75 samples - at ./dataset/gen-word-2730-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", + "Generated JSONL file with - 2780 max words, 75 samples - at ./dataset/gen-word-2780-count.jsonl\n", + "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", + "Generated JSONL file with - 2790 max words, 75 samples - at ./dataset/gen-word-2790-count.jsonl\n", + "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", + "Generated JSONL file with - 4025 max words, 100 samples - at ./dataset/gen-word-4025-count.jsonl\n", + "Generated JSONL file with - 2800 max words, 75 samples - at ./dataset/gen-word-2800-count.jsonl\n", + "Generated JSONL file with - 4225 max words, 100 samples - at ./dataset/gen-word-4225-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", + "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", + "Generated JSONL file with - 2830 max words, 75 samples - at ./dataset/gen-word-2830-count.jsonl\n", + "Generated JSONL file with - 2810 max words, 75 samples - at ./dataset/gen-word-2810-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", + "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", + "Generated JSONL file with - 2840 max words, 75 samples - at ./dataset/gen-word-2840-count.jsonl\n", + "Generated JSONL file with - 2500 max words, 75 samples - at ./dataset/gen-word-2500-count.jsonl\n", + "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", + "Generated JSONL file with - 2850 max words, 75 samples - at ./dataset/gen-word-2850-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", + "Generated JSONL file with - 2490 max words, 75 samples - at ./dataset/gen-word-2490-count.jsonl\n", + "Generated JSONL file with - 2820 max words, 75 samples - at ./dataset/gen-word-2820-count.jsonl\n", + "Generated JSONL file with - 2860 max words, 75 samples - at ./dataset/gen-word-2860-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", + "Generated JSONL file with - 2770 max words, 75 samples - at ./dataset/gen-word-2770-count.jsonl\n", + "Generated a single JSONL file with 116 samples (75 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", + "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", + "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", + "Generated JSONL file with - 2700 max words, 75 samples - at ./dataset/gen-word-2700-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", + "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", + "Generated JSONL file with - 2920 max words, 75 samples - at ./dataset/gen-word-2920-count.jsonl\n", + "Generated JSONL file with - 2870 max words, 75 samples - at ./dataset/gen-word-2870-count.jsonl\n", + "Generated JSONL file with - 2940 max words, 75 samples - at ./dataset/gen-word-2940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", + "Generated JSONL file with - 2880 max words, 75 samples - at ./dataset/gen-word-2880-count.jsonl\n", + "Generated JSONL file with - 2890 max words, 75 samples - at ./dataset/gen-word-2890-count.jsonl\n", + "Generated JSONL file with - 2950 max words, 75 samples - at ./dataset/gen-word-2950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 75 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", + "Generated JSONL file with - 2990 max words, 75 samples - at ./dataset/gen-word-2990-count.jsonl\n", + "Generated JSONL file with - 2910 max words, 75 samples - at ./dataset/gen-word-2910-count.jsonl\n", + "Generated JSONL file with - 2970 max words, 75 samples - at ./dataset/gen-word-2970-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", + "Generated JSONL file with - 2980 max words, 75 samples - at ./dataset/gen-word-2980-count.jsonl\n", + "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", + "Generated JSONL file with - 2960 max words, 75 samples - at ./dataset/gen-word-2960-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", + "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", + "Generated JSONL file with - 2930 max words, 75 samples - at ./dataset/gen-word-2930-count.jsonl\n", + "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", + "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", + "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", + "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", + "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", + "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", + "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", + "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", + "\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", + "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", + "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", + "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", + "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", + "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", + "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", + "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4300 max words - at ./dataset/shuffle-word-4300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", + "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4125 max words - at ./dataset/shuffle-word-4125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", + "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", + "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", + "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", + "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", + "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonl\n", + "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", + "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", + "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", + "Generated JSONL file with - 4350 max words, 100 samples - at ./dataset/gen-word-4350-count.jsonl\n", + "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", + "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", + "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", + "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4375 max words - at ./dataset/shuffle-word-4375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4925 max words - at ./dataset/shuffle-word-4925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", + "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", + "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4700 max words - at ./dataset/shuffle-word-4700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", + "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", + "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", + "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", + "Generated JSONL file with - 4325 max words, 100 samples - at ./dataset/gen-word-4325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4775 max words - at ./dataset/shuffle-word-4775-count.jsonl\n", + "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", + "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", + "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", + "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonl\n", + "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", + "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5275 max words - at ./dataset/shuffle-word-5275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", + "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", + "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", + "\n", + "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", + "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", + "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5850 max words - at ./dataset/shuffle-word-5850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", + "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", + "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonl\n", + "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", + "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", + "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", + "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", + "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", + "Generated JSONL file with - 4925 max words, 100 samples - at ./dataset/gen-word-4925-count.jsonl\n", + "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", + "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", + "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", + "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5875 max words - at ./dataset/shuffle-word-5875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", + "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", + "Generated JSONL file with - 5000 max words, 100 samples - at ./dataset/gen-word-5000-count.jsonl\n", + "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", + "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", + "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", + "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", + "Generated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", + "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", + "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", + "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", + "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", + "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", + "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", + "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", + "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", + "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", + "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", + "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", + "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", + "Generated JSONL file with - 5375 max words, 100 samples - at ./dataset/gen-word-5375-count.jsonl\n", + "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", + "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", + "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", + "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", + "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", + "Generated JSONL file with - 5700 max words, 100 samples - at ./dataset/gen-word-5700-count.jsonl\n", + "Generated JSONL file with - 5875 max words, 100 samples - at ./dataset/gen-word-5875-count.jsonl\n", + "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", + "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", + "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", + "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", + "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", + "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", + "## Done ##\n", + "total 1.8G\n", + "drwxr-xr-x 2 root root 40K Jan 23 22:18 .\n", + "drwxr-xr-x 5 root root 4.0K Jan 23 22:16 ..\n", + "-rw-r--r-- 1 root root 20K Jan 23 22:18 gen-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 106K Jan 23 22:18 gen-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 736K Jan 23 22:18 gen-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 738K Jan 23 22:18 gen-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 752K Jan 23 22:18 gen-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 750K Jan 23 22:18 gen-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 759K Jan 23 22:18 gen-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 778K Jan 23 22:18 gen-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 779K Jan 23 22:18 gen-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 786K Jan 23 22:18 gen-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 790K Jan 23 22:18 gen-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 798K Jan 23 22:18 gen-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 86K Jan 23 22:18 gen-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 805K Jan 23 22:18 gen-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 814K Jan 23 22:18 gen-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 822K Jan 23 22:18 gen-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 841K Jan 23 22:18 gen-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 838K Jan 23 22:18 gen-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 841K Jan 23 22:18 gen-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 848K Jan 23 22:18 gen-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 859K Jan 23 22:18 gen-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 865K Jan 23 22:18 gen-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 872K Jan 23 22:18 gen-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 95K Jan 23 22:18 gen-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 879K Jan 23 22:18 gen-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 893K Jan 23 22:18 gen-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 904K Jan 23 22:18 gen-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 893K Jan 23 22:18 gen-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 912K Jan 23 22:18 gen-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 918K Jan 23 22:18 gen-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 935K Jan 23 22:18 gen-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 938K Jan 23 22:18 gen-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 946K Jan 23 22:18 gen-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 952K Jan 23 22:18 gen-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 22:18 gen-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 956K Jan 23 22:18 gen-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 965K Jan 23 22:18 gen-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 968K Jan 23 22:18 gen-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 964K Jan 23 22:18 gen-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 980K Jan 23 22:18 gen-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 990K Jan 23 22:18 gen-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 990K Jan 23 22:18 gen-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1005K Jan 23 22:18 gen-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1017K Jan 23 22:18 gen-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1017K Jan 23 22:18 gen-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 110K Jan 23 22:18 gen-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 25K Jan 23 22:18 gen-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 115K Jan 23 22:18 gen-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 125K Jan 23 22:18 gen-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 133K Jan 23 22:18 gen-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 145K Jan 23 22:18 gen-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 143K Jan 23 22:18 gen-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 30K Jan 23 22:18 gen-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 153K Jan 23 22:18 gen-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 159K Jan 23 22:18 gen-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 170K Jan 23 22:18 gen-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 180K Jan 23 22:18 gen-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 184K Jan 23 22:18 gen-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 34K Jan 23 22:18 gen-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 191K Jan 23 22:18 gen-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 199K Jan 23 22:18 gen-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 206K Jan 23 22:18 gen-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 209K Jan 23 22:18 gen-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 220K Jan 23 22:18 gen-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 39K Jan 23 22:18 gen-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 221K Jan 23 22:18 gen-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 gen-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 gen-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 gen-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 236K Jan 23 22:18 gen-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 gen-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 gen-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 240K Jan 23 22:18 gen-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 252K Jan 23 22:18 gen-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 22:18 gen-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 22:18 gen-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 258K Jan 23 22:18 gen-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 43K Jan 23 22:18 gen-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 267K Jan 23 22:18 gen-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 269K Jan 23 22:18 gen-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 22:18 gen-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 22:18 gen-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 22:18 gen-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 22:18 gen-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 277K Jan 23 22:18 gen-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 22:18 gen-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 22:18 gen-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 22:18 gen-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 285K Jan 23 22:18 gen-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 296K Jan 23 22:18 gen-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 gen-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 gen-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 gen-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 gen-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 49K Jan 23 22:18 gen-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 298K Jan 23 22:18 gen-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 308K Jan 23 22:18 gen-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 22:18 gen-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 22:18 gen-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 22:18 gen-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 314K Jan 23 22:18 gen-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 22:18 gen-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 22:18 gen-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 22:18 gen-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 22:18 gen-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 319K Jan 23 22:18 gen-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 22:18 gen-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 22:18 gen-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 22:18 gen-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 22:18 gen-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 328K Jan 23 22:18 gen-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 55K Jan 23 22:18 gen-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 336K Jan 23 22:18 gen-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 341K Jan 23 22:18 gen-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 22:18 gen-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 22:18 gen-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 351K Jan 23 22:18 gen-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 22:18 gen-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 358K Jan 23 22:18 gen-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 22:18 gen-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 22:18 gen-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 362K Jan 23 22:18 gen-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 22:18 gen-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 22:18 gen-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 22:18 gen-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 22:18 gen-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 22:18 gen-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 58K Jan 23 22:18 gen-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 370K Jan 23 22:18 gen-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 22:18 gen-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 22:18 gen-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 22:18 gen-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 22:18 gen-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 388K Jan 23 22:18 gen-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 22:18 gen-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 22:18 gen-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 gen-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 gen-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 386K Jan 23 22:18 gen-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 gen-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 gen-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 386K Jan 23 22:18 gen-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 22:18 gen-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 404K Jan 23 22:18 gen-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 22:18 gen-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 22:18 gen-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 22:18 gen-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 22:18 gen-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 63K Jan 23 22:18 gen-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 411K Jan 23 22:18 gen-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 22:18 gen-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 22:18 gen-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 22:18 gen-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 415K Jan 23 22:18 gen-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 421K Jan 23 22:18 gen-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 22:18 gen-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 22:18 gen-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 22:18 gen-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 22:18 gen-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 435K Jan 23 22:18 gen-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 431K Jan 23 22:18 gen-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 22:18 gen-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 22:18 gen-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 22:18 gen-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 71K Jan 23 22:18 gen-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 444K Jan 23 22:18 gen-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.8M Jan 23 22:18 gen-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 458K Jan 23 22:18 gen-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 468K Jan 23 22:18 gen-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 466K Jan 23 22:18 gen-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 471K Jan 23 22:18 gen-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 76K Jan 23 22:18 gen-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 479K Jan 23 22:18 gen-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 486K Jan 23 22:18 gen-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 495K Jan 23 22:18 gen-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 508K Jan 23 22:18 gen-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 499K Jan 23 22:18 gen-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 80K Jan 23 22:18 gen-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 521K Jan 23 22:18 gen-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 529K Jan 23 22:18 gen-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 533K Jan 23 22:18 gen-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 543K Jan 23 22:18 gen-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 548K Jan 23 22:18 gen-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 84K Jan 23 22:18 gen-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 553K Jan 23 22:18 gen-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 561K Jan 23 22:18 gen-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 568K Jan 23 22:18 gen-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 577K Jan 23 22:18 gen-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 583K Jan 23 22:18 gen-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 88K Jan 23 22:18 gen-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 595K Jan 23 22:18 gen-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 599K Jan 23 22:18 gen-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 612K Jan 23 22:18 gen-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 610K Jan 23 22:18 gen-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 618K Jan 23 22:18 gen-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 93K Jan 23 22:18 gen-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 623K Jan 23 22:18 gen-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 639K Jan 23 22:18 gen-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 639K Jan 23 22:18 gen-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 642K Jan 23 22:18 gen-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 668K Jan 23 22:18 gen-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 98K Jan 23 22:18 gen-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 662K Jan 23 22:18 gen-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 667K Jan 23 22:18 gen-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 672K Jan 23 22:18 gen-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 677K Jan 23 22:18 gen-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 700K Jan 23 22:18 gen-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 101K Jan 23 22:18 gen-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 695K Jan 23 22:18 gen-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 707K Jan 23 22:18 gen-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 719K Jan 23 22:18 gen-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 719K Jan 23 22:18 gen-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 733K Jan 23 22:18 gen-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 shuffle-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 22:18 shuffle-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 shuffle-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 shuffle-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 shuffle-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 22:18 shuffle-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 shuffle-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 22:18 shuffle-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 shuffle-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 shuffle-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 shuffle-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 shuffle-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 7.9M Jan 23 22:18 shuffle-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 shuffle-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 shuffle-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 shuffle-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 shuffle-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 shuffle-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 shuffle-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 shuffle-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 22:18 shuffle-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 22:18 shuffle-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 22:18 shuffle-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 12K Jan 23 22:18 word-2-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 22:18 word-4-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 3000 words dataset\n", + "# \n", + "for i in {110..3000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 3000+ - 400 words dataset\n", + "# \n", + "for i in {3025..6000..25} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "da287711", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolving data files: 100%|███████████████| 862/862 [00:00<00:00, 107543.06it/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 36851/36851 [00:01<00:00, 19869.65 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 1547/1547 [00:00<00:00, 30397.64 exampl\n" + ] + } + ], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "07d1bf84", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "03c6af10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-23 22:32:27,860] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-7B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-7B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.devices=auto', '--trainer.microbatch_size=4', '--model.ctx_len=8192'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-7B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-7B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.devices=auto', '--trainer.microbatch_size=4', '--model.ctx_len=8192'].\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 1430867974\n", + "Seed set to 1430867974\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 256\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 4\n", + " - accumulate_grad_batches: 8\n", + " - effective_batch_size: 256\n", + "\n", + "[rank: 0] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-23 22:33:36,801] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,802] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 3] Seed set to 1430867974\n", + "[rank: 5] Seed set to 1430867974\n", + "[rank: 2] Seed set to 1430867974\n", + "[rank: 6] Seed set to 1430867974\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 7] Seed set to 1430867974\n", + "[rank: 1] Seed set to 1430867974\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 4] Seed set to 1430867974\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 2] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 1] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 3] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 4] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 7] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 6] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_223508-bdvkilfd\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8xA100] RWKV-v5-7B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_2)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/bdvkilfd\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 2.000e-04 (0.0002)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.016431331634521484 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10186290740966797 seconds\n", + "Time to load fused_adam op: 0.10216116905212402 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10233497619628906 seconds\n", + "Time to load fused_adam op: 0.10216617584228516 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10257244110107422 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.1023705005645752 seconds\n", + "Time to load fused_adam op: 0.10263657569885254 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 268 M \n", + "1 | blocks | ModuleList | 7.0 B \n", + "2 | ln_out | LayerNorm | 8.2 K \n", + "3 | head | Linear | 268 M \n", + "--------------------------------------\n", + "7.5 B Trainable params\n", + "0 Non-trainable params\n", + "7.5 B Total params\n", + "30,072.177Total estimated model params size (MB)\n", + "Epoch 0: 17%|▏| 200/1152 [43:43<3:28:08, 0.08it/s, v_num=ilfd, train/loss=0.00/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|█| 1152/1152 [3:52:43<00:00, 0.08it/s, v_num=ilfd, train/loss=0.7\n", + "Validation: | | 0/? [00:00