diff --git a/RWKV-v5/config-example.yaml b/RWKV-v5/config-example.yaml index ca2564fd..28577f14 100644 --- a/RWKV-v5/config-example.yaml +++ b/RWKV-v5/config-example.yaml @@ -511,6 +511,16 @@ data: # this can be used together with sort_by_length, otherwise a shuffle will be done packing_in_sequence: False + # ---------------------------- + # Specal use caes flags + # ---------------------------- + + # Reverse the training dataset order before saving, this is useful for, + # optimizing dataset packing process, when using packing_in_sequence + # and sort_by_length desc order together + reverse_train_dataset_before_save: False + + # Path to the current checkpoint to continue training from # this should be the directory path, and ends with `.ckpt/` ckpt_path: null diff --git a/RWKV-v5/src/data.py b/RWKV-v5/src/data.py index ddf2c6df..7c75d65c 100644 --- a/RWKV-v5/src/data.py +++ b/RWKV-v5/src/data.py @@ -797,6 +797,13 @@ def merge_into_existing_samples(i): # Get the subset of the dataset src_dataset["train"] = src_dataset["train"].select(range(offset_val, offset_val + length_val)) + # Dataset flipping (if needed) + if kargs["reverse_train_dataset_before_save"]: + train_dataset = src_dataset["train"] + def reverse_dataset(x, idx): + return train_dataset[train_dataset.num_rows - idx - 1] + src_dataset["train"] = src_dataset["train"].map(reverse_dataset, with_indices=True, num_proc=num_cpus) + # Save the dataset to disk src_dataset.save_to_disk(kargs["data_path"]) @@ -961,6 +968,15 @@ def __init__( # this can be used together with sort_by_length, otherwise a shuffle will be done packing_in_sequence: bool = False, + # ---------------------------- + # Specal use caes flags + # ---------------------------- + + # Reverse the training dataset order before saving, this is useful for, + # optimizing dataset packing process, when using packing_in_sequence + # and sort_by_length desc order together + reverse_train_dataset_before_save: bool = False, + # ---------------------------- # System tweaks # ---------------------------- diff --git a/RWKV-v5/src/model.py b/RWKV-v5/src/model.py index 779b348a..e16dbfd5 100644 --- a/RWKV-v5/src/model.py +++ b/RWKV-v5/src/model.py @@ -1139,13 +1139,18 @@ def checkpointed_step(idx, targets, mask, last_shift_states, # https://lightning.ai/docs/pytorch/2.0.4/common/lightning_module.html#backward learning_loss = segment_train_loss / gradient_accumulation_steps - # Undocumented multiple backward pass support - # https://github.com/Lightning-AI/lightning/blob/678f642808c54e4c490caee4df5d357301c976bb/tests/trainer/optimization/test_manual_optimization.py#L251 - self.manual_backward(learning_loss, optimizer, retain_graph=True) - - # Accumulate without gradient, as we already did the backward pass - # This does mean, that a single backward pass is "wasted" at the end - training_loss = training_loss + segment_train_loss.clone().detach().requires_grad_(False) + # Perform the backward pass accordingly, for valid segments (besides the last segment) + if i == start_learning_segment + backward_segment_count - 1: + # This is the last backward pass, we let the default pytorch lightning handle the backward pass + # and return the segment loss as part of the total loss + training_loss = training_loss + segment_train_loss + else: + # Undocumented multiple backward pass support + # https://github.com/Lightning-AI/lightning/blob/678f642808c54e4c490caee4df5d357301c976bb/tests/trainer/optimization/test_manual_optimization.py#L251 + self.manual_backward(learning_loss, optimizer, retain_graph=True) + + # Accumulate without gradient, as we already did the backward pass + training_loss = training_loss + segment_train_loss.clone().detach().requires_grad_(False) else: # Even if its not the segments we use for backward pass, we still need to accumulate the loss training_loss = training_loss + segment_train_loss.clone().detach().requires_grad_(False) @@ -1234,7 +1239,7 @@ def checkpointed_step(idx, targets, mask, last_shift_states, # Throw if total loss is NaN assert not torch.isnan(training_loss), "training_loss is NaN" - return training_loss + return sampling_loss, training_loss # # Training and validation steps @@ -1244,9 +1249,9 @@ def training_step(self, batch, batch_idx): # print("=== BATCH ID SHAPE ===", batch["input_ids"].shape) # print("=== BATCH AM SHAPE ===", batch["attention_mask"].shape) - total_loss = self.compute_loss(batch, batch_idx, True) + sampling_loss, training_loss = self.compute_loss(batch, batch_idx, True) - self.log('train/loss', total_loss, prog_bar=True) + self.log('train/loss', training_loss, prog_bar=True) # If set - forces the above train/loss log line to always be on a new line if self.substep_logging: print("") @@ -1256,21 +1261,21 @@ def training_step(self, batch, batch_idx): torch.cuda.empty_cache() # if loss not a number return None - if torch.isnan(total_loss): + if torch.isnan(training_loss): return None - return total_loss + return training_loss @TCompileBaseline def validation_step(self, batch, batch_idx): - total_loss = self.compute_loss(batch, batch_idx, False) - self.log('validation/loss', total_loss, prog_bar=True, sync_dist=True) + sampling_loss, training_loss = self.compute_loss(batch, batch_idx, False) + self.log('validation/loss', sampling_loss, prog_bar=True, sync_dist=True) # Reset the token tracking accordingly self._counting_tokens = 0 self._counting_time_start = time.time() - return total_loss + return sampling_loss ### --- # SimpleRWKV, a wrapper for RWKV that allows for simple usage of the model diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb new file mode 100644 index 00000000..9a9cf515 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb @@ -0,0 +1,3137 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "ecd2a3ff", + "metadata": { + "papermill": { + "duration": 0.003441, + "end_time": "2024-01-23T13:12:26.844416", + "exception": false, + "start_time": "2024-01-23T13:12:26.840975", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV World Memory Finetune (Memory Finetune)\n", + "\n", + "This takes an existing RWKV world model, and finetune them specifically for the memory repeat task of various sizes.\n", + "This test is used as an approximation of testing the model token memory size in the \"worse case scenerio\"\n", + "\n", + "- Using randomized data, so prior learning does not help, nor is it possible to compress the data\n", + "- Using a variety of token lengths, to avoid overfitting to a single length\n", + "- Based on the pretrained model (rwkv world)\n", + "- This process does \"destroy the model\" but it helps quantify the model limits\n", + "\n", + "In practise however, the model may show \"attention range\" longer then what is benchmarked, as natural text is highly compressible. Unlike the pure randomized data that was being tested here.\n", + "\n", + "This runner has been optimized to run on 8 x 80GB vram nodes, you should allocate atleast 1TB disk space.\n", + "\n", + "> This project assumes you have the rwkv-infctx conda env setup, and you are executing in that environment - see the main README.md for the conda env setup steps" + ] + }, + { + "cell_type": "markdown", + "id": "5a185acc", + "metadata": { + "papermill": { + "duration": 0.00437, + "end_time": "2024-01-23T13:12:26.854086", + "exception": false, + "start_time": "2024-01-23T13:12:26.849716", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Configure your environment settings\n", + "(!Important: you will need to rerun the below cell, if you restart your kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f8b3c0c6", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:26.859575Z", + "iopub.status.busy": "2024-01-23T13:12:26.859381Z", + "iopub.status.idle": "2024-01-23T13:12:26.871264Z", + "shell.execute_reply": "2024-01-23T13:12:26.870420Z" + }, + "papermill": { + "duration": 0.017126, + "end_time": "2024-01-23T13:12:26.873493", + "exception": false, + "start_time": "2024-01-23T13:12:26.856367", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"[8xA100] RWKV-v5-1B5-World\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-1B5-world.pth\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/RWKV-5-World-1B5-v2-20231025-ctx4096.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "MEMORY_SCRIPT_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./notebook/util-scripts/memory_script\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "markdown", + "id": "6adf698d", + "metadata": { + "papermill": { + "duration": 0.002976, + "end_time": "2024-01-23T13:12:26.883554", + "exception": false, + "start_time": "2024-01-23T13:12:26.880578", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Download the pretrained model\n", + "(if you want to skip the the basemodel train + instruct tune)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b64f1b0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:26.889176Z", + "iopub.status.busy": "2024-01-23T13:12:26.888991Z", + "iopub.status.idle": "2024-01-23T13:12:27.384385Z", + "shell.execute_reply": "2024-01-23T13:12:27.382982Z" + }, + "papermill": { + "duration": 0.501114, + "end_time": "2024-01-23T13:12:27.387069", + "exception": false, + "start_time": "2024-01-23T13:12:26.885955", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f73ffd44", + "metadata": { + "papermill": { + "duration": 0.005131, + "end_time": "2024-01-23T13:12:27.397590", + "exception": false, + "start_time": "2024-01-23T13:12:27.392459", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 2x2k) : Dataset preperation\n", + "\n", + "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14058c3c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:27.404298Z", + "iopub.status.busy": "2024-01-23T13:12:27.403896Z", + "iopub.status.idle": "2024-01-23T13:12:31.171512Z", + "shell.execute_reply": "2024-01-23T13:12:31.170138Z" + }, + "papermill": { + "duration": 3.774062, + "end_time": "2024-01-23T13:12:31.174157", + "exception": false, + "start_time": "2024-01-23T13:12:27.400095", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Folder and eval pip setup\n", + "!cp -r \"{MEMORY_SCRIPT_DIR}/\" \"{NOTEBOOK_DIR}/\"\n", + "!python3 -m pip install rwkv asyncio aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75bb2cc2", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:31.185716Z", + "iopub.status.busy": "2024-01-23T13:12:31.185310Z", + "iopub.status.idle": "2024-01-23T13:12:34.675107Z", + "shell.execute_reply": "2024-01-23T13:12:34.673742Z" + }, + "papermill": { + "duration": 3.524274, + "end_time": "2024-01-23T13:12:34.704500", + "exception": false, + "start_time": "2024-01-23T13:12:31.180226", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for < 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 150 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {110..200..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {210..4000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db71b4d3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:12:34.941925Z", + "iopub.status.busy": "2024-01-23T13:12:34.941597Z", + "iopub.status.idle": "2024-01-23T13:15:24.022937Z", + "shell.execute_reply": "2024-01-23T13:15:24.021071Z" + }, + "papermill": { + "duration": 169.166276, + "end_time": "2024-01-23T13:15:24.026023", + "exception": false, + "start_time": "2024-01-23T13:12:34.859747", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-1-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-1-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "8fa689d3", + "metadata": { + "papermill": { + "duration": 0.113732, + "end_time": "2024-01-23T13:15:24.262597", + "exception": false, + "start_time": "2024-01-23T13:15:24.148865", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 2x2k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffc77911", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:15:24.493500Z", + "iopub.status.busy": "2024-01-23T13:15:24.493066Z", + "iopub.status.idle": "2024-01-23T14:05:58.422960Z", + "shell.execute_reply": "2024-01-23T14:05:58.420918Z" + }, + "papermill": { + "duration": 3034.049363, + "end_time": "2024-01-23T14:05:58.426231", + "exception": false, + "start_time": "2024-01-23T13:15:24.376868", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-1-tune.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-1 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=8 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "599687e8", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T14:05:58.744066Z", + "iopub.status.busy": "2024-01-23T14:05:58.743461Z", + "iopub.status.idle": "2024-01-23T14:06:16.527156Z", + "shell.execute_reply": "2024-01-23T14:06:16.525668Z" + }, + "papermill": { + "duration": 17.942794, + "end_time": "2024-01-23T14:06:16.530004", + "exception": false, + "start_time": "2024-01-23T14:05:58.587210", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7211d2f3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T14:08:07.728497Z", + "iopub.status.busy": "2024-01-23T14:08:07.728014Z", + "iopub.status.idle": "2024-01-23T14:14:00.936308Z", + "shell.execute_reply": "2024-01-23T14:14:00.934930Z" + }, + "papermill": { + "duration": 353.378901, + "end_time": "2024-01-23T14:14:00.939288", + "exception": false, + "start_time": "2024-01-23T14:08:07.560387", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\" \"none\" 1000 3000" + ] + }, + { + "cell_type": "markdown", + "id": "8259d9b5", + "metadata": {}, + "source": [ + "## Finetune 1 (0 -> 2x2k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b7f8a18", + "metadata": {}, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-1-tune.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-1 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=4 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f35d6f67", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db5eef37", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\" \"none\" 1000 3000" + ] + }, + { + "cell_type": "markdown", + "id": "6fe5d71b", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0f54c3e1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n", + "Generated JSONL file with - 2 max words, 100 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 10 max words, 100 samples - at ./dataset/gen-word-10-count.jsonl\n", + "Generated JSONL file with - 15 max words, 100 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 20 max words, 100 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 25 max words, 100 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 4 max words, 100 samples - at ./dataset/word-4-count.jsonl\n", + "Generated JSONL file with - 5 max words, 100 samples - at ./dataset/gen-word-5-count.jsonl\n", + "Generated JSONL file with - 55 max words, 100 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 45 max words, 100 samples - at ./dataset/gen-word-45-count.jsonlGenerated JSONL file with - 50 max words, 100 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 70 max words, 100 samples - at ./dataset/gen-word-70-count.jsonl\n", + "\n", + "Generated JSONL file with - 30 max words, 100 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 65 max words, 100 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 60 max words, 100 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 35 max words, 100 samples - at ./dataset/gen-word-35-count.jsonlGenerated JSONL file with - 90 max words, 100 samples - at ./dataset/gen-word-90-count.jsonl\n", + "\n", + "Generated JSONL file with - 40 max words, 100 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 80 max words, 100 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 85 max words, 100 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 75 max words, 100 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 120 max words, 75 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 180 max words, 75 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 210 max words, 75 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 130 max words, 75 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 170 max words, 75 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 340 max words, 75 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 150 max words, 75 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 460 max words, 75 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated JSONL file with - 510 max words, 75 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 95 max words, 100 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 520 max words, 75 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated JSONL file with - 350 max words, 75 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated JSONL file with - 430 max words, 75 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 470 max words, 75 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated JSONL file with - 330 max words, 75 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 100 max words, 100 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 220 max words, 75 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 280 max words, 75 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated JSONL file with - 240 max words, 75 samples - at ./dataset/gen-word-240-count.jsonl\n", + "Generated JSONL file with - 160 max words, 75 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated JSONL file with - 290 max words, 75 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 200 max words, 75 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 390 max words, 75 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated JSONL file with - 320 max words, 75 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 550 max words, 75 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated JSONL file with - 190 max words, 75 samples - at ./dataset/gen-word-190-count.jsonlGenerated JSONL file with - 450 max words, 75 samples - at ./dataset/gen-word-450-count.jsonl\n", + "\n", + "Generated JSONL file with - 540 max words, 75 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated a single JSONL file with 1290 samples (75 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated JSONL file with - 560 max words, 75 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated JSONL file with - 600 max words, 75 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated JSONL file with - 440 max words, 75 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated JSONL file with - 360 max words, 75 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated a single JSONL file with 689 samples (75 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated JSONL file with - 570 max words, 75 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated a single JSONL file with 1387 samples (75 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated JSONL file with - 530 max words, 75 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated JSONL file with - 380 max words, 75 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated a single JSONL file with 1155 samples (75 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated JSONL file with - 490 max words, 75 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated JSONL file with - 270 max words, 75 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 620 max words, 75 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated a single JSONL file with 438 samples (75 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated a single JSONL file with 694 samples (75 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", + "Generated a single JSONL file with 1025 samples (75 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated a single JSONL file with 1099 samples (75 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated a single JSONL file with 1058 samples (75 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "Generated JSONL file with - 780 max words, 75 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated JSONL file with - 820 max words, 75 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated JSONL file with - 480 max words, 75 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 260 max words, 75 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated a single JSONL file with 754 samples (75 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated a single JSONL file with 685 samples (75 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated a single JSONL file with 2635 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated a single JSONL file with 530 samples (75 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated a single JSONL file with 527 samples (75 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated JSONL file with - 250 max words, 75 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 960 max words, 75 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "Generated a single JSONL file with 446 samples (75 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated JSONL file with - 370 max words, 75 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated a single JSONL file with 530 samples (75 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated JSONL file with - 310 max words, 75 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated a single JSONL file with 3558 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated a single JSONL file with 2945 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated JSONL file with - 110 max words, 75 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 640 max words, 75 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated a single JSONL file with 525 samples (75 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated JSONL file with - 230 max words, 75 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 590 max words, 75 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated a single JSONL file with 1030 samples (75 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated JSONL file with - 660 max words, 75 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated JSONL file with - 580 max words, 75 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated JSONL file with - 500 max words, 75 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated JSONL file with - 300 max words, 75 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated a single JSONL file with 4085 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", + "Generated a single JSONL file with 535 samples (75 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "Generated JSONL file with - 650 max words, 75 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated JSONL file with - 670 max words, 75 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated JSONL file with - 140 max words, 75 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated a single JSONL file with 2789 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated JSONL file with - 610 max words, 75 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated a single JSONL file with 728 samples (75 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated a single JSONL file with 691 samples (75 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated a single JSONL file with 3791 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated JSONL file with - 690 max words, 75 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated a single JSONL file with 5899 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated a single JSONL file with 5249 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated a single JSONL file with 3299 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated JSONL file with - 410 max words, 75 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated JSONL file with - 630 max words, 75 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated JSONL file with - 880 max words, 75 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated JSONL file with - 420 max words, 75 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated JSONL file with - 830 max words, 75 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated JSONL file with - 700 max words, 75 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated a single JSONL file with 707 samples (75 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated a single JSONL file with 7559 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated JSONL file with - 890 max words, 75 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 372 samples (75 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated a single JSONL file with 4390 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 1494 samples (75 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated a single JSONL file with 4825 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated JSONL file with - 860 max words, 75 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 790 max words, 75 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated a single JSONL file with 6532 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated JSONL file with - 400 max words, 75 samples - at ./dataset/gen-word-400-count.jsonl\n", + "Generated a single JSONL file with 434 samples (75 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 75 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated a single JSONL file with 8753 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated a single JSONL file with 793 samples (75 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated a single JSONL file with 749 samples (75 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated a single JSONL file with 1006 samples (75 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated a single JSONL file with 1217 samples (75 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated JSONL file with - 850 max words, 75 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 75 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated JSONL file with - 810 max words, 75 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated JSONL file with - 970 max words, 75 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated a single JSONL file with 3132 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 75 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated JSONL file with - 680 max words, 75 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated a single JSONL file with 304 samples (75 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated JSONL file with - 800 max words, 75 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated a single JSONL file with 196 samples (75 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated a single JSONL file with 686 samples (75 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated a single JSONL file with 13087 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated JSONL file with - 740 max words, 75 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated a single JSONL file with 531 samples (75 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated JSONL file with - 910 max words, 75 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated JSONL file with - 1750 max words, 75 samples - at ./dataset/gen-word-1750-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 75 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated JSONL file with - 760 max words, 75 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated JSONL file with - 710 max words, 75 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 75 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated a single JSONL file with 10610 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated a single JSONL file with 236 samples (75 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated JSONL file with - 770 max words, 75 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated JSONL file with - 900 max words, 75 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated JSONL file with - 1700 max words, 75 samples - at ./dataset/gen-word-1700-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 75 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 75 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated JSONL file with - 990 max words, 75 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated a single JSONL file with 237 samples (75 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated a single JSONL file with 237 samples (75 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated JSONL file with - 870 max words, 75 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 75 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 75 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 75 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 75 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated a single JSONL file with 230 samples (75 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 75 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 75 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 75 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated JSONL file with - 720 max words, 75 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated JSONL file with - 750 max words, 75 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated JSONL file with - 2970 max words, 75 samples - at ./dataset/gen-word-2970-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated a single JSONL file with 297 samples (75 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 75 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated a single JSONL file with 17810 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 184 samples (75 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated a single JSONL file with 26091 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated JSONL file with - 940 max words, 75 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 243 samples (75 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 75 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 75 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 75 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated JSONL file with - 840 max words, 75 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated JSONL file with - 930 max words, 75 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated JSONL file with - 980 max words, 75 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", + "Generated JSONL file with - 730 max words, 75 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated JSONL file with - 920 max words, 75 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 75 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 75 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 75 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 75 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated a single JSONL file with 231 samples (75 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 75 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 75 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated JSONL file with - 950 max words, 75 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated a single JSONL file with 234 samples (75 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated a single JSONL file with 240 samples (75 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 75 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 75 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 75 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 75 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 75 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated a single JSONL file with 184 samples (75 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "Generated a single JSONL file with 188 samples (75 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", + "Generated a single JSONL file with 184 samples (75 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", + "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonlGenerated JSONL file with - 1240 max words, 75 samples - at ./dataset/gen-word-1240-count.jsonl\n", + "\n", + "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated JSONL file with - 1560 max words, 75 samples - at ./dataset/gen-word-1560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated JSONL file with - 2100 max words, 75 samples - at ./dataset/gen-word-2100-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 75 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", + "Generated JSONL file with - 1950 max words, 75 samples - at ./dataset/gen-word-1950-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 75 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 75 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated a single JSONL file with 191 samples (75 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 75 samples - at ./dataset/gen-word-1320-count.jsonlGenerated JSONL file with - 2650 max words, 75 samples - at ./dataset/gen-word-2650-count.jsonl\n", + "\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", + "Generated a single JSONL file with 140 samples (75 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", + "Generated JSONL file with - 2660 max words, 75 samples - at ./dataset/gen-word-2660-count.jsonl\n", + "Generated a single JSONL file with 114 samples (75 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated a single JSONL file with 188 samples (75 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated a single JSONL file with 154 samples (75 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 75 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated JSONL file with - 1310 max words, 75 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 75 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 75 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated JSONL file with - 1440 max words, 75 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 75 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 75 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 75 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 75 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 75 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 75 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 75 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 75 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 75 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated a single JSONL file with 183 samples (75 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated a single JSONL file with 55738 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "Generated JSONL file with - 1520 max words, 75 samples - at ./dataset/gen-word-1520-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", + "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 75 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 75 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 75 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated a single JSONL file with 241 samples (75 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1600 max words, 75 samples - at ./dataset/gen-word-1600-count.jsonl\n", + "Generated JSONL file with - 1530 max words, 75 samples - at ./dataset/gen-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", + "Generated JSONL file with - 1660 max words, 75 samples - at ./dataset/gen-word-1660-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", + "Generated JSONL file with - 1540 max words, 75 samples - at ./dataset/gen-word-1540-count.jsonl\n", + "Generated JSONL file with - 1690 max words, 75 samples - at ./dataset/gen-word-1690-count.jsonl\n", + "Generated JSONL file with - 1510 max words, 75 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated JSONL file with - 1590 max words, 75 samples - at ./dataset/gen-word-1590-count.jsonl\n", + "Generated JSONL file with - 1550 max words, 75 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", + "Generated JSONL file with - 1740 max words, 75 samples - at ./dataset/gen-word-1740-count.jsonl\n", + "Generated JSONL file with - 1580 max words, 75 samples - at ./dataset/gen-word-1580-count.jsonlGenerated JSONL file with - 1670 max words, 75 samples - at ./dataset/gen-word-1670-count.jsonl\n", + "\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", + "Generated JSONL file with - 1620 max words, 75 samples - at ./dataset/gen-word-1620-count.jsonl\n", + "Generated JSONL file with - 1760 max words, 75 samples - at ./dataset/gen-word-1760-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", + "Generated JSONL file with - 1640 max words, 75 samples - at ./dataset/gen-word-1640-count.jsonl\n", + "Generated JSONL file with - 1790 max words, 75 samples - at ./dataset/gen-word-1790-count.jsonl\n", + "Generated JSONL file with - 1630 max words, 75 samples - at ./dataset/gen-word-1630-count.jsonl\n", + "Generated JSONL file with - 1770 max words, 75 samples - at ./dataset/gen-word-1770-count.jsonl\n", + "Generated JSONL file with - 1840 max words, 75 samples - at ./dataset/gen-word-1840-count.jsonl\n", + "Generated JSONL file with - 1610 max words, 75 samples - at ./dataset/gen-word-1610-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", + "Generated JSONL file with - 1800 max words, 75 samples - at ./dataset/gen-word-1800-count.jsonl\n", + "Generated JSONL file with - 1680 max words, 75 samples - at ./dataset/gen-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", + "Generated JSONL file with - 1720 max words, 75 samples - at ./dataset/gen-word-1720-count.jsonl\n", + "Generated JSONL file with - 1570 max words, 75 samples - at ./dataset/gen-word-1570-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", + "Generated JSONL file with - 1850 max words, 75 samples - at ./dataset/gen-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", + "Generated JSONL file with - 1780 max words, 75 samples - at ./dataset/gen-word-1780-count.jsonl\n", + "Generated JSONL file with - 1820 max words, 75 samples - at ./dataset/gen-word-1820-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", + "Generated JSONL file with - 1730 max words, 75 samples - at ./dataset/gen-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", + "Generated JSONL file with - 1810 max words, 75 samples - at ./dataset/gen-word-1810-count.jsonl\n", + "Generated JSONL file with - 1710 max words, 75 samples - at ./dataset/gen-word-1710-count.jsonl\n", + "Generated JSONL file with - 1870 max words, 75 samples - at ./dataset/gen-word-1870-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", + "Generated JSONL file with - 1880 max words, 75 samples - at ./dataset/gen-word-1880-count.jsonl\n", + "Generated JSONL file with - 1860 max words, 75 samples - at ./dataset/gen-word-1860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", + "Generated JSONL file with - 1830 max words, 75 samples - at ./dataset/gen-word-1830-count.jsonl\n", + "Generated JSONL file with - 2310 max words, 75 samples - at ./dataset/gen-word-2310-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonl\n", + "Generated JSONL file with - 1910 max words, 75 samples - at ./dataset/gen-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", + "Generated JSONL file with - 1650 max words, 75 samples - at ./dataset/gen-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", + "Generated JSONL file with - 1960 max words, 75 samples - at ./dataset/gen-word-1960-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", + "Generated JSONL file with - 1890 max words, 75 samples - at ./dataset/gen-word-1890-count.jsonl\n", + "Generated JSONL file with - 2050 max words, 75 samples - at ./dataset/gen-word-2050-count.jsonl\n", + "Generated JSONL file with - 2440 max words, 75 samples - at ./dataset/gen-word-2440-count.jsonl\n", + "Generated JSONL file with - 1900 max words, 75 samples - at ./dataset/gen-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated JSONL file with - 2480 max words, 75 samples - at ./dataset/gen-word-2480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", + "Generated JSONL file with - 1930 max words, 75 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "Generated JSONL file with - 1940 max words, 75 samples - at ./dataset/gen-word-1940-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", + "Generated JSONL file with - 2330 max words, 75 samples - at ./dataset/gen-word-2330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", + "Generated JSONL file with - 2170 max words, 75 samples - at ./dataset/gen-word-2170-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", + "Generated JSONL file with - 2500 max words, 75 samples - at ./dataset/gen-word-2500-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", + "Generated a single JSONL file with 139 samples (75 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", + "Generated JSONL file with - 2580 max words, 75 samples - at ./dataset/gen-word-2580-count.jsonl\n", + "Generated JSONL file with - 1970 max words, 75 samples - at ./dataset/gen-word-1970-count.jsonl\n", + "Generated JSONL file with - 2000 max words, 75 samples - at ./dataset/gen-word-2000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", + "Generated JSONL file with - 1920 max words, 75 samples - at ./dataset/gen-word-1920-count.jsonl\n", + "Generated JSONL file with - 1980 max words, 75 samples - at ./dataset/gen-word-1980-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonl\n", + "Generated JSONL file with - 1990 max words, 75 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", + "Generated JSONL file with - 2820 max words, 75 samples - at ./dataset/gen-word-2820-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", + "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated JSONL file with - 2010 max words, 75 samples - at ./dataset/gen-word-2010-count.jsonl\n", + "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", + "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", + "Generated JSONL file with - 2030 max words, 75 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated a single JSONL file with 140 samples (75 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", + "Generated JSONL file with - 2550 max words, 75 samples - at ./dataset/gen-word-2550-count.jsonl\n", + "Generated JSONL file with - 2020 max words, 75 samples - at ./dataset/gen-word-2020-count.jsonl\n", + "Generated JSONL file with - 2040 max words, 75 samples - at ./dataset/gen-word-2040-count.jsonl\n", + "Generated JSONL file with - 2590 max words, 75 samples - at ./dataset/gen-word-2590-count.jsonl\n", + "Generated JSONL file with - 2060 max words, 75 samples - at ./dataset/gen-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", + "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", + "Generated JSONL file with - 2110 max words, 75 samples - at ./dataset/gen-word-2110-count.jsonl\n", + "Generated JSONL file with - 2140 max words, 75 samples - at ./dataset/gen-word-2140-count.jsonl\n", + "Generated JSONL file with - 2080 max words, 75 samples - at ./dataset/gen-word-2080-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonl\n", + "Generated JSONL file with - 2570 max words, 75 samples - at ./dataset/gen-word-2570-count.jsonl\n", + "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", + "Generated a single JSONL file with 79 samples (75 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonl\n", + "Generated JSONL file with - 2220 max words, 75 samples - at ./dataset/gen-word-2220-count.jsonl\n", + "Generated JSONL file with - 2120 max words, 75 samples - at ./dataset/gen-word-2120-count.jsonl\n", + "Generated JSONL file with - 2190 max words, 75 samples - at ./dataset/gen-word-2190-count.jsonl\n", + "Generated JSONL file with - 2180 max words, 75 samples - at ./dataset/gen-word-2180-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", + "Generated JSONL file with - 2230 max words, 75 samples - at ./dataset/gen-word-2230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", + "Generated JSONL file with - 2090 max words, 75 samples - at ./dataset/gen-word-2090-count.jsonl\n", + "Generated JSONL file with - 2270 max words, 75 samples - at ./dataset/gen-word-2270-count.jsonl\n", + "Generated JSONL file with - 2290 max words, 75 samples - at ./dataset/gen-word-2290-count.jsonl\n", + "Generated JSONL file with - 2260 max words, 75 samples - at ./dataset/gen-word-2260-count.jsonl\n", + "Generated JSONL file with - 2200 max words, 75 samples - at ./dataset/gen-word-2200-count.jsonl\n", + "Generated JSONL file with - 2130 max words, 75 samples - at ./dataset/gen-word-2130-count.jsonl\n", + "Generated JSONL file with - 2250 max words, 75 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", + "Generated JSONL file with - 2300 max words, 75 samples - at ./dataset/gen-word-2300-count.jsonl\n", + "Generated JSONL file with - 2150 max words, 75 samples - at ./dataset/gen-word-2150-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonl\n", + "Generated JSONL file with - 2240 max words, 75 samples - at ./dataset/gen-word-2240-count.jsonl\n", + "Generated JSONL file with - 2160 max words, 75 samples - at ./dataset/gen-word-2160-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", + "Generated a single JSONL file with 133 samples (75 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", + "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", + "Generated JSONL file with - 2280 max words, 75 samples - at ./dataset/gen-word-2280-count.jsonl\n", + "Generated JSONL file with - 2070 max words, 75 samples - at ./dataset/gen-word-2070-count.jsonl\n", + "Generated JSONL file with - 2360 max words, 75 samples - at ./dataset/gen-word-2360-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated JSONL file with - 2420 max words, 75 samples - at ./dataset/gen-word-2420-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", + "Generated JSONL file with - 2390 max words, 75 samples - at ./dataset/gen-word-2390-count.jsonl\n", + "Generated JSONL file with - 2470 max words, 75 samples - at ./dataset/gen-word-2470-count.jsonl\n", + "Generated a single JSONL file with 141 samples (75 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "Generated a single JSONL file with 116 samples (75 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", + "Generated JSONL file with - 2520 max words, 75 samples - at ./dataset/gen-word-2520-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonl\n", + "Generated JSONL file with - 2510 max words, 75 samples - at ./dataset/gen-word-2510-count.jsonl\n", + "Generated JSONL file with - 2450 max words, 75 samples - at ./dataset/gen-word-2450-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", + "Generated a single JSONL file with 116 samples (75 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", + "Generated a single JSONL file with 115 samples (75 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", + "Generated a single JSONL file with 107 samples (75 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", + "Generated a single JSONL file with 115 samples (75 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", + "Generated a single JSONL file with 119 samples (75 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", + "Generated a single JSONL file with 112 samples (75 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", + "Generated a single JSONL file with 85 samples (75 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", + "Generated JSONL file with - 2600 max words, 75 samples - at ./dataset/gen-word-2600-count.jsonl\n", + "Generated JSONL file with - 2540 max words, 75 samples - at ./dataset/gen-word-2540-count.jsonl\n", + "Generated JSONL file with - 2700 max words, 75 samples - at ./dataset/gen-word-2700-count.jsonl\n", + "Generated a single JSONL file with 111 samples (75 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", + "Generated a single JSONL file with 139 samples (75 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", + "Generated JSONL file with - 2340 max words, 75 samples - at ./dataset/gen-word-2340-count.jsonl\n", + "Generated JSONL file with - 2530 max words, 75 samples - at ./dataset/gen-word-2530-count.jsonl\n", + "Generated JSONL file with - 2400 max words, 75 samples - at ./dataset/gen-word-2400-count.jsonl\n", + "Generated a single JSONL file with 113 samples (75 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", + "Generated JSONL file with - 2620 max words, 75 samples - at ./dataset/gen-word-2620-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", + "Generated JSONL file with - 2610 max words, 75 samples - at ./dataset/gen-word-2610-count.jsonl\n", + "Generated JSONL file with - 2490 max words, 75 samples - at ./dataset/gen-word-2490-count.jsonl\n", + "Generated a single JSONL file with 84 samples (75 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", + "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", + "Generated JSONL file with - 2690 max words, 75 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", + "Generated JSONL file with - 2370 max words, 75 samples - at ./dataset/gen-word-2370-count.jsonl\n", + "Generated JSONL file with - 2210 max words, 75 samples - at ./dataset/gen-word-2210-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", + "Generated JSONL file with - 2560 max words, 75 samples - at ./dataset/gen-word-2560-count.jsonl\n", + "Generated JSONL file with - 2640 max words, 75 samples - at ./dataset/gen-word-2640-count.jsonl\n", + "Generated JSONL file with - 2730 max words, 75 samples - at ./dataset/gen-word-2730-count.jsonl\n", + "Generated JSONL file with - 2350 max words, 75 samples - at ./dataset/gen-word-2350-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", + "Generated JSONL file with - 2670 max words, 75 samples - at ./dataset/gen-word-2670-count.jsonl\n", + "Generated JSONL file with - 2680 max words, 75 samples - at ./dataset/gen-word-2680-count.jsonl\n", + "Generated JSONL file with - 2380 max words, 75 samples - at ./dataset/gen-word-2380-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", + "Generated JSONL file with - 2710 max words, 75 samples - at ./dataset/gen-word-2710-count.jsonl\n", + "Generated a single JSONL file with 91 samples (75 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", + "Generated JSONL file with - 2320 max words, 75 samples - at ./dataset/gen-word-2320-count.jsonl\n", + "Generated JSONL file with - 2720 max words, 75 samples - at ./dataset/gen-word-2720-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", + "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", + "Generated JSONL file with - 2770 max words, 75 samples - at ./dataset/gen-word-2770-count.jsonl\n", + "Generated JSONL file with - 2460 max words, 75 samples - at ./dataset/gen-word-2460-count.jsonl\n", + "Generated JSONL file with - 2760 max words, 75 samples - at ./dataset/gen-word-2760-count.jsonl\n", + "Generated JSONL file with - 2740 max words, 75 samples - at ./dataset/gen-word-2740-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", + "Generated JSONL file with - 2430 max words, 75 samples - at ./dataset/gen-word-2430-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", + "Generated JSONL file with - 2630 max words, 75 samples - at ./dataset/gen-word-2630-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", + "Generated JSONL file with - 2750 max words, 75 samples - at ./dataset/gen-word-2750-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated JSONL file with - 2790 max words, 75 samples - at ./dataset/gen-word-2790-count.jsonl\n", + "Generated JSONL file with - 2410 max words, 75 samples - at ./dataset/gen-word-2410-count.jsonl\n", + "Generated JSONL file with - 2800 max words, 75 samples - at ./dataset/gen-word-2800-count.jsonl\n", + "Generated JSONL file with - 2780 max words, 75 samples - at ./dataset/gen-word-2780-count.jsonl\n", + "Generated JSONL file with - 2830 max words, 75 samples - at ./dataset/gen-word-2830-count.jsonl\n", + "Generated a single JSONL file with 141 samples (75 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", + "Generated a single JSONL file with 92 samples (75 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", + "Generated JSONL file with - 2850 max words, 75 samples - at ./dataset/gen-word-2850-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", + "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", + "Generated JSONL file with - 2840 max words, 75 samples - at ./dataset/gen-word-2840-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", + "Generated JSONL file with - 2910 max words, 75 samples - at ./dataset/gen-word-2910-count.jsonl\n", + "Generated JSONL file with - 2870 max words, 75 samples - at ./dataset/gen-word-2870-count.jsonl\n", + "Generated JSONL file with - 2880 max words, 75 samples - at ./dataset/gen-word-2880-count.jsonl\n", + "Generated JSONL file with - 2930 max words, 75 samples - at ./dataset/gen-word-2930-count.jsonl\n", + "Generated JSONL file with - 2920 max words, 75 samples - at ./dataset/gen-word-2920-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated JSONL file with - 2960 max words, 75 samples - at ./dataset/gen-word-2960-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", + "Generated JSONL file with - 2950 max words, 75 samples - at ./dataset/gen-word-2950-count.jsonl\n", + "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated JSONL file with - 2980 max words, 75 samples - at ./dataset/gen-word-2980-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", + "Generated JSONL file with - 2890 max words, 75 samples - at ./dataset/gen-word-2890-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", + "Generated JSONL file with - 2940 max words, 75 samples - at ./dataset/gen-word-2940-count.jsonl\n", + "Generated JSONL file with - 2990 max words, 75 samples - at ./dataset/gen-word-2990-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", + "Generated JSONL file with - 2810 max words, 75 samples - at ./dataset/gen-word-2810-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", + "Generated JSONL file with - 2860 max words, 75 samples - at ./dataset/gen-word-2860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", + "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", + "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 100 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", + "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", + "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", + "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", + "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", + "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", + "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", + "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", + "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", + "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", + "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", + "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", + "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", + "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", + "Generated JSONL file with - 2900 max words, 75 samples - at ./dataset/gen-word-2900-count.jsonl\n", + "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", + "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", + "\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", + "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", + "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", + "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", + "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 75 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", + "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", + "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", + "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", + "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", + "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", + "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", + "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", + "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", + "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", + "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", + "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4700 max words - at ./dataset/shuffle-word-4700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4375 max words - at ./dataset/shuffle-word-4375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4075 max words - at ./dataset/shuffle-word-4075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", + "Generated JSONL file with - 4025 max words, 100 samples - at ./dataset/gen-word-4025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", + "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", + "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4125 max words - at ./dataset/shuffle-word-4125-count.jsonl\n", + "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", + "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", + "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", + "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", + "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", + "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", + "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4575 max words - at ./dataset/shuffle-word-4575-count.jsonl\n", + "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", + "Generated JSONL file with - 4350 max words, 100 samples - at ./dataset/gen-word-4350-count.jsonl\n", + "Generated JSONL file with - 4225 max words, 100 samples - at ./dataset/gen-word-4225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", + "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", + "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4300 max words - at ./dataset/shuffle-word-4300-count.jsonl\n", + "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", + "Generated JSONL file with - 4325 max words, 100 samples - at ./dataset/gen-word-4325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5275 max words - at ./dataset/shuffle-word-5275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", + "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", + "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4925 max words - at ./dataset/shuffle-word-4925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4775 max words - at ./dataset/shuffle-word-4775-count.jsonl\n", + "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", + "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", + "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", + "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", + "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", + "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", + "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", + "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", + "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", + "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", + "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", + "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", + "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", + "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", + "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", + "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", + "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", + "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", + "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", + "Generated JSONL file with - 5375 max words, 100 samples - at ./dataset/gen-word-5375-count.jsonl\n", + "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", + "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5875 max words - at ./dataset/shuffle-word-5875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", + "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", + "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", + "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", + "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5850 max words - at ./dataset/shuffle-word-5850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", + "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", + "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", + "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", + "Generated JSONL file with - 5000 max words, 100 samples - at ./dataset/gen-word-5000-count.jsonl\n", + "Generated JSONL file with - 5700 max words, 100 samples - at ./dataset/gen-word-5700-count.jsonl\n", + "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", + "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", + "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", + "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", + "Generated JSONL file with - 5875 max words, 100 samples - at ./dataset/gen-word-5875-count.jsonl\n", + "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", + "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", + "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", + "Generated JSONL file with - 6000 max words, 100 samples - at ./dataset/gen-word-6000-count.jsonl\n", + "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", + "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", + "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", + "Generated JSONL file with - 4925 max words, 100 samples - at ./dataset/gen-word-4925-count.jsonl\n", + "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", + "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", + "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", + "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", + "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", + "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", + "## Done ##\n", + "total 1.8G\n", + "drwxr-xr-x 2 root root 40K Jan 23 18:27 .\n", + "drwxr-xr-x 5 root root 4.0K Jan 23 13:12 ..\n", + "-rw-r--r-- 1 root root 20K Jan 23 18:27 gen-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 105K Jan 23 18:27 gen-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 739K Jan 23 18:27 gen-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 753K Jan 23 18:27 gen-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 754K Jan 23 18:27 gen-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 749K Jan 23 18:27 gen-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 765K Jan 23 18:27 gen-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 770K Jan 23 18:27 gen-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 784K Jan 23 18:27 gen-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 779K Jan 23 18:27 gen-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 791K Jan 23 18:27 gen-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 805K Jan 23 18:27 gen-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 90K Jan 23 18:27 gen-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 812K Jan 23 18:27 gen-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 823K Jan 23 18:27 gen-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 832K Jan 23 18:27 gen-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 835K Jan 23 18:27 gen-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 840K Jan 23 18:27 gen-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 848K Jan 23 18:27 gen-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 855K Jan 23 18:27 gen-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 861K Jan 23 18:27 gen-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 872K Jan 23 18:27 gen-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 879K Jan 23 18:27 gen-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 92K Jan 23 18:27 gen-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 892K Jan 23 18:27 gen-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 899K Jan 23 18:27 gen-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 886K Jan 23 18:27 gen-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 898K Jan 23 18:27 gen-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 911K Jan 23 18:27 gen-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 914K Jan 23 18:27 gen-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 925K Jan 23 18:27 gen-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 937K Jan 23 18:27 gen-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 949K Jan 23 18:27 gen-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 955K Jan 23 18:27 gen-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 18:27 gen-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 953K Jan 23 18:27 gen-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 953K Jan 23 18:27 gen-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 965K Jan 23 18:27 gen-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 979K Jan 23 18:27 gen-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 989K Jan 23 18:27 gen-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 986K Jan 23 18:27 gen-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 990K Jan 23 18:27 gen-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1017K Jan 23 18:27 gen-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1000K Jan 23 18:27 gen-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 111K Jan 23 18:27 gen-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 25K Jan 23 18:27 gen-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 115K Jan 23 18:27 gen-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 18:27 gen-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 123K Jan 23 18:27 gen-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 18:27 gen-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 133K Jan 23 18:27 gen-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 142K Jan 23 18:27 gen-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 18:27 gen-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 147K Jan 23 18:27 gen-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 18:27 gen-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 30K Jan 23 18:27 gen-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 152K Jan 23 18:27 gen-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 18:27 gen-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 160K Jan 23 18:27 gen-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 166K Jan 23 18:27 gen-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 18:27 gen-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 177K Jan 23 18:27 gen-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 18:27 gen-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 181K Jan 23 18:27 gen-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 34K Jan 23 18:27 gen-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 193K Jan 23 18:27 gen-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 18:27 gen-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 204K Jan 23 18:27 gen-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 gen-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 201K Jan 23 18:27 gen-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 214K Jan 23 18:27 gen-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 gen-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 220K Jan 23 18:27 gen-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 gen-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 40K Jan 23 18:27 gen-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 226K Jan 23 18:27 gen-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 18:27 gen-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 gen-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 gen-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 gen-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 234K Jan 23 18:27 gen-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 gen-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 gen-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 238K Jan 23 18:27 gen-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 gen-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 247K Jan 23 18:27 gen-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 18:27 gen-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 18:27 gen-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 18:27 gen-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 gen-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 256K Jan 23 18:27 gen-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 gen-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 gen-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 18:27 gen-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 gen-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 44K Jan 23 18:27 gen-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 262K Jan 23 18:27 gen-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 18:27 gen-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 18:27 gen-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 269K Jan 23 18:27 gen-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 gen-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 277K Jan 23 18:27 gen-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 18:27 gen-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 18:27 gen-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 18:27 gen-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 283K Jan 23 18:27 gen-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 292K Jan 23 18:27 gen-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 18:27 gen-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 18:27 gen-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 18:27 gen-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 18:27 gen-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 48K Jan 23 18:27 gen-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 297K Jan 23 18:27 gen-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 18:27 gen-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 18:27 gen-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 18:27 gen-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 18:27 gen-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 313K Jan 23 18:27 gen-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 315K Jan 23 18:27 gen-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 18:27 gen-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 18:27 gen-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 18:27 gen-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 324K Jan 23 18:27 gen-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 18:27 gen-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 gen-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 gen-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 gen-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 324K Jan 23 18:27 gen-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 gen-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 18:27 gen-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 18:27 gen-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 18:27 gen-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 54K Jan 23 18:27 gen-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 340K Jan 23 18:27 gen-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 18:27 gen-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 348K Jan 23 18:27 gen-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 18:27 gen-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 18:27 gen-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 18:27 gen-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 346K Jan 23 18:27 gen-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 18:27 gen-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 18:27 gen-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 18:27 gen-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 18:27 gen-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 352K Jan 23 18:27 gen-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 18:27 gen-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 18:27 gen-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 369K Jan 23 18:27 gen-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 18:27 gen-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 18:27 gen-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 18:27 gen-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 57K Jan 23 18:27 gen-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 375K Jan 23 18:27 gen-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 18:27 gen-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 18:27 gen-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 18:27 gen-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 18:27 gen-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 378K Jan 23 18:27 gen-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 18:27 gen-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 18:27 gen-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 18:27 gen-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 388K Jan 23 18:27 gen-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 gen-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 394K Jan 23 18:27 gen-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 18:27 gen-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 18:27 gen-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 18:27 gen-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 18:27 gen-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 404K Jan 23 18:27 gen-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 18:27 gen-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 18:27 gen-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 18:27 gen-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 18:27 gen-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 62K Jan 23 18:27 gen-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 404K Jan 23 18:27 gen-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 18:27 gen-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 18:27 gen-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 18:27 gen-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 18:27 gen-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 415K Jan 23 18:27 gen-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 18:27 gen-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 18:27 gen-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 18:27 gen-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 18:27 gen-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 425K Jan 23 18:27 gen-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 18:27 gen-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 18:27 gen-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 18:27 gen-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 433K Jan 23 18:27 gen-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 18:27 gen-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 438K Jan 23 18:27 gen-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 69K Jan 23 18:27 gen-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 447K Jan 23 18:27 gen-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 18:27 gen-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 448K Jan 23 18:27 gen-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 456K Jan 23 18:27 gen-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 471K Jan 23 18:27 gen-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 468K Jan 23 18:27 gen-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 73K Jan 23 18:27 gen-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 489K Jan 23 18:27 gen-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 481K Jan 23 18:27 gen-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 496K Jan 23 18:27 gen-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 511K Jan 23 18:27 gen-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 510K Jan 23 18:27 gen-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 77K Jan 23 18:27 gen-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 523K Jan 23 18:27 gen-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 531K Jan 23 18:27 gen-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 534K Jan 23 18:27 gen-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 544K Jan 23 18:27 gen-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 546K Jan 23 18:27 gen-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 83K Jan 23 18:27 gen-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 548K Jan 23 18:27 gen-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 564K Jan 23 18:27 gen-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 576K Jan 23 18:27 gen-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 582K Jan 23 18:27 gen-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 588K Jan 23 18:27 gen-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 87K Jan 23 18:27 gen-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 589K Jan 23 18:27 gen-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 598K Jan 23 18:27 gen-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 608K Jan 23 18:27 gen-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 616K Jan 23 18:27 gen-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 626K Jan 23 18:27 gen-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 94K Jan 23 18:27 gen-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 623K Jan 23 18:27 gen-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 640K Jan 23 18:27 gen-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 637K Jan 23 18:27 gen-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 651K Jan 23 18:27 gen-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 660K Jan 23 18:27 gen-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 96K Jan 23 18:27 gen-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 663K Jan 23 18:27 gen-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 673K Jan 23 18:27 gen-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 675K Jan 23 18:27 gen-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 688K Jan 23 18:27 gen-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 693K Jan 23 18:27 gen-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 18:27 gen-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 700K Jan 23 18:27 gen-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 713K Jan 23 18:27 gen-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 723K Jan 23 18:27 gen-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 726K Jan 23 18:27 gen-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 727K Jan 23 18:27 gen-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 18:27 shuffle-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 18:27 shuffle-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 shuffle-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 shuffle-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 shuffle-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 18:27 shuffle-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 18:27 shuffle-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 18:27 shuffle-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 18:27 shuffle-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 18:27 shuffle-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 18:27 shuffle-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 18:27 shuffle-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 18:27 shuffle-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 shuffle-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 8.0M Jan 23 18:27 shuffle-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 18:27 shuffle-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 shuffle-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 18:27 shuffle-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 18:27 shuffle-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 18:27 shuffle-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 18:27 shuffle-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 18:27 shuffle-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 18:27 shuffle-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 12K Jan 23 18:27 word-2-count.jsonl\n", + "-rw-r--r-- 1 root root 14K Jan 23 18:27 word-4-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 3000 words dataset\n", + "# \n", + "for i in {110..3000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 3000+ - 400 words dataset\n", + "# \n", + "for i in {3025..6000..25} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fa2c8f0f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolving data files: 100%|███████████████| 862/862 [00:00<00:00, 141224.56it/s]\n", + "Generating train split: 312299 examples [00:08, 38514.98 examples/s] \n", + "Map (num_proc=96): 100%|███████| 312299/312299 [01:05<00:00, 4748.29 examples/s]\n", + "Filter (num_proc=96): 100%|███| 312299/312299 [00:28<00:00, 11020.85 examples/s]\n", + "Map (num_proc=96): 100%|██████| 307944/307944 [00:06<00:00, 50786.71 examples/s]\n", + "Map (num_proc=96): 100%|███████| 307944/307944 [00:41<00:00, 7509.42 examples/s]\n", + "Map (num_proc=96): 100%|█████████| 36875/36875 [00:13<00:00, 2686.82 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 36875/36875 [00:01<00:00, 23914.80 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 1548/1548 [00:00<00:00, 35414.86 exampl\n" + ] + } + ], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "60244472", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4025b7b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-23 18:30:56,182] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=4', '--model.ctx_len=8192'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-1B5-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=4', '--model.ctx_len=8192'].\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 3725758400\n", + "Seed set to 3725758400\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 256\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 4\n", + " - accumulate_grad_batches: 8\n", + " - effective_batch_size: 256\n", + "\n", + "[rank: 0] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-23 18:31:17,933] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,934] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,934] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,936] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,936] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,937] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 18:31:17,937] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 2] Seed set to 3725758400\n", + "[rank: 3] Seed set to 3725758400\n", + "[rank: 5] Seed set to 3725758400\n", + "[rank: 4] Seed set to 3725758400\n", + "[rank: 1] Seed set to 3725758400\n", + "[rank: 7] Seed set to 3725758400\n", + "[rank: 6] Seed set to 3725758400\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 7] Seed set to 3725758400\n", + "[rank: 6] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 1] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 3] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 4] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 2] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 3725758400\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_183155-ss3js48a\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8xA100] RWKV-v5-1B5-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/ss3js48a\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 2.000e-04 (0.0002)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.013622045516967773 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.101654052734375 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10231280326843262 seconds\n", + "Time to load fused_adam op: 0.10230112075805664 secondsTime to load fused_adam op: 0.10238885879516602 seconds\n", + "\n", + "Time to load fused_adam op: 0.10254812240600586 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10338902473449707 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10484576225280762 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "Epoch 0: 17%|▏| 200/1153 [12:26<59:16, 0.27it/s, v_num=s48a, train/loss=0.013]/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|█| 1153/1153 [1:04:42<00:00, 0.30it/s, v_num=s48a, train/loss=2.4\n", + "Validation: | | 0/? [00:00 This project assumes you have the rwkv-infctx conda env setup, and you are executing in that environment - see the main README.md for the conda env setup steps" + ] + }, + { + "cell_type": "markdown", + "id": "0a0d2888", + "metadata": { + "papermill": { + "duration": 0.004809, + "end_time": "2024-01-23T11:31:12.276770", + "exception": false, + "start_time": "2024-01-23T11:31:12.271961", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Configure your environment settings\n", + "(!Important: you will need to rerun the below cell, if you restart your kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e8b16f4a", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.286314Z", + "iopub.status.busy": "2024-01-23T11:31:12.285325Z", + "iopub.status.idle": "2024-01-23T11:31:12.303712Z", + "shell.execute_reply": "2024-01-23T11:31:12.302559Z" + }, + "papermill": { + "duration": 0.024597, + "end_time": "2024-01-23T11:31:12.306334", + "exception": false, + "start_time": "2024-01-23T11:31:12.281737", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"[8xA100] RWKV-v5-3B-World\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-3B-world.pth\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/RWKV-5-World-3B-v2-20231118-ctx16k.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "MEMORY_SCRIPT_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./notebook/util-scripts/memory_script\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "markdown", + "id": "dc0dcc53", + "metadata": { + "papermill": { + "duration": 0.003834, + "end_time": "2024-01-23T11:31:12.316887", + "exception": false, + "start_time": "2024-01-23T11:31:12.313053", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Download the pretrained model\n", + "(if you want to skip the the basemodel train + instruct tune)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db947b68", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.326806Z", + "iopub.status.busy": "2024-01-23T11:31:12.325636Z", + "iopub.status.idle": "2024-01-23T11:31:12.823116Z", + "shell.execute_reply": "2024-01-23T11:31:12.821743Z" + }, + "papermill": { + "duration": 0.505449, + "end_time": "2024-01-23T11:31:12.826085", + "exception": false, + "start_time": "2024-01-23T11:31:12.320636", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e8c15352", + "metadata": { + "papermill": { + "duration": 0.005467, + "end_time": "2024-01-23T11:31:12.837257", + "exception": false, + "start_time": "2024-01-23T11:31:12.831790", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 2x2k) : Dataset preperation\n", + "\n", + "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bfca27", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.843769Z", + "iopub.status.busy": "2024-01-23T11:31:12.843411Z", + "iopub.status.idle": "2024-01-23T11:31:16.588281Z", + "shell.execute_reply": "2024-01-23T11:31:16.587130Z" + }, + "papermill": { + "duration": 3.751098, + "end_time": "2024-01-23T11:31:16.591186", + "exception": false, + "start_time": "2024-01-23T11:31:12.840088", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Folder and eval pip setup\n", + "!cp -r \"{MEMORY_SCRIPT_DIR}/\" \"{NOTEBOOK_DIR}/\"\n", + "!python3 -m pip install rwkv asyncio aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c38e51c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:16.604643Z", + "iopub.status.busy": "2024-01-23T11:31:16.604064Z", + "iopub.status.idle": "2024-01-23T11:31:19.825147Z", + "shell.execute_reply": "2024-01-23T11:31:19.823826Z" + }, + "papermill": { + "duration": 3.231491, + "end_time": "2024-01-23T11:31:19.828814", + "exception": false, + "start_time": "2024-01-23T11:31:16.597323", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for < 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 150 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {110..200..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {210..4000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0577a12c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:20.066235Z", + "iopub.status.busy": "2024-01-23T11:31:20.065803Z", + "iopub.status.idle": "2024-01-23T11:34:10.695875Z", + "shell.execute_reply": "2024-01-23T11:34:10.694270Z" + }, + "papermill": { + "duration": 170.715947, + "end_time": "2024-01-23T11:34:10.699529", + "exception": false, + "start_time": "2024-01-23T11:31:19.983582", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-1-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-1-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "82e837d9", + "metadata": { + "papermill": { + "duration": 0.113925, + "end_time": "2024-01-23T11:34:10.936645", + "exception": false, + "start_time": "2024-01-23T11:34:10.822720", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 2x2k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "127af572", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:34:11.167985Z", + "iopub.status.busy": "2024-01-23T11:34:11.167540Z", + "iopub.status.idle": "2024-01-23T13:03:10.590814Z", + "shell.execute_reply": "2024-01-23T13:03:10.588629Z" + }, + "papermill": { + "duration": 5339.542389, + "end_time": "2024-01-23T13:03:10.594019", + "exception": false, + "start_time": "2024-01-23T11:34:11.051630", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-1-tune.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-1 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=8 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81395227", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:03:10.911324Z", + "iopub.status.busy": "2024-01-23T13:03:10.910769Z", + "iopub.status.idle": "2024-01-23T13:03:39.572566Z", + "shell.execute_reply": "2024-01-23T13:03:39.571025Z" + }, + "papermill": { + "duration": 28.823213, + "end_time": "2024-01-23T13:03:39.575536", + "exception": false, + "start_time": "2024-01-23T13:03:10.752323", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "595a1b18", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:05:37.852892Z", + "iopub.status.busy": "2024-01-23T13:05:37.852266Z", + "iopub.status.idle": "2024-01-23T13:12:24.206962Z", + "shell.execute_reply": "2024-01-23T13:12:24.205778Z" + }, + "papermill": { + "duration": 406.525339, + "end_time": "2024-01-23T13:12:24.209784", + "exception": false, + "start_time": "2024-01-23T13:05:37.684445", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\" \"none\" 1000 3000" + ] + }, + { + "cell_type": "markdown", + "id": "bca9abd4", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8a4da53a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n", + "Generated JSONL file with - 5 max words, 100 samples - at ./dataset/gen-word-5-count.jsonl\n", + "Generated JSONL file with - 2 max words, 100 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 25 max words, 100 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 45 max words, 100 samples - at ./dataset/gen-word-45-count.jsonl\n", + "Generated JSONL file with - 4 max words, 100 samples - at ./dataset/word-4-count.jsonl\n", + "Generated JSONL file with - 10 max words, 100 samples - at ./dataset/gen-word-10-count.jsonl\n", + "Generated JSONL file with - 15 max words, 100 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 20 max words, 100 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 40 max words, 100 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 30 max words, 100 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 70 max words, 100 samples - at ./dataset/gen-word-70-count.jsonl\n", + "Generated JSONL file with - 35 max words, 100 samples - at ./dataset/gen-word-35-count.jsonl\n", + "Generated JSONL file with - 50 max words, 100 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 60 max words, 100 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 90 max words, 100 samples - at ./dataset/gen-word-90-count.jsonl\n", + "Generated JSONL file with - 80 max words, 100 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 85 max words, 100 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 120 max words, 75 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 65 max words, 100 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 75 max words, 100 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 55 max words, 100 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 230 max words, 75 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 360 max words, 75 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated JSONL file with - 200 max words, 75 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 260 max words, 75 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated JSONL file with - 140 max words, 75 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated JSONL file with - 110 max words, 75 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 270 max words, 75 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 460 max words, 75 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated JSONL file with - 400 max words, 75 samples - at ./dataset/gen-word-400-count.jsonl\n", + "Generated JSONL file with - 300 max words, 75 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated JSONL file with - 95 max words, 100 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 280 max words, 75 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated JSONL file with - 150 max words, 75 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 160 max words, 75 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated JSONL file with - 240 max words, 75 samples - at ./dataset/gen-word-240-count.jsonlGenerated JSONL file with - 190 max words, 75 samples - at ./dataset/gen-word-190-count.jsonl\n", + "\n", + "Generated JSONL file with - 580 max words, 75 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated JSONL file with - 290 max words, 75 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 210 max words, 75 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 250 max words, 75 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 130 max words, 75 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 410 max words, 75 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated JSONL file with - 180 max words, 75 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 320 max words, 75 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 170 max words, 75 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 220 max words, 75 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 370 max words, 75 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated JSONL file with - 100 max words, 100 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 450 max words, 75 samples - at ./dataset/gen-word-450-count.jsonl\n", + "Generated JSONL file with - 500 max words, 75 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated JSONL file with - 310 max words, 75 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated JSONL file with - 430 max words, 75 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 420 max words, 75 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated JSONL file with - 380 max words, 75 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated JSONL file with - 530 max words, 75 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated JSONL file with - 350 max words, 75 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated JSONL file with - 510 max words, 75 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 340 max words, 75 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 560 max words, 75 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated JSONL file with - 550 max words, 75 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated JSONL file with - 330 max words, 75 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 490 max words, 75 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated JSONL file with - 470 max words, 75 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated JSONL file with - 520 max words, 75 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated JSONL file with - 440 max words, 75 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated JSONL file with - 390 max words, 75 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated JSONL file with - 480 max words, 75 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 570 max words, 75 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated JSONL file with - 680 max words, 75 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated a single JSONL file with 680 samples (75 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated a single JSONL file with 694 samples (75 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", + "Generated a single JSONL file with 527 samples (75 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated JSONL file with - 590 max words, 75 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated a single JSONL file with 685 samples (75 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated JSONL file with - 610 max words, 75 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated a single JSONL file with 532 samples (75 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated a single JSONL file with 1005 samples (75 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated a single JSONL file with 1034 samples (75 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated a single JSONL file with 681 samples (75 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated a single JSONL file with 523 samples (75 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated a single JSONL file with 436 samples (75 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated JSONL file with - 640 max words, 75 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated a single JSONL file with 444 samples (75 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonlGenerated a single JSONL file with 789 samples (75 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "\n", + "Generated a single JSONL file with 305 samples (75 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonlGenerated a single JSONL file with 373 samples (75 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated a single JSONL file with 686 samples (75 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "\n", + "Generated a single JSONL file with 699 samples (75 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated JSONL file with - 690 max words, 75 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated a single JSONL file with 1153 samples (75 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated a single JSONL file with 534 samples (75 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated a single JSONL file with 4083 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonlGenerated a single JSONL file with 375 samples (75 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated a single JSONL file with 432 samples (75 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated a single JSONL file with 1491 samples (75 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated a single JSONL file with 447 samples (75 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated a single JSONL file with 1018 samples (75 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated a single JSONL file with 724 samples (75 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated a single JSONL file with 1287 samples (75 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated a single JSONL file with 1098 samples (75 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated a single JSONL file with 2650 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated JSONL file with - 1670 max words, 75 samples - at ./dataset/gen-word-1670-count.jsonl\n", + "Generated a single JSONL file with 1227 samples (75 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", + "Generated a single JSONL file with 3543 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated a single JSONL file with 2916 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 439 samples (75 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 75 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated a single JSONL file with 4794 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 1056 samples (75 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated JSONL file with - 800 max words, 75 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated JSONL file with - 630 max words, 75 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated JSONL file with - 600 max words, 75 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated a single JSONL file with 522 samples (75 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated a single JSONL file with 3773 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated a single JSONL file with 3298 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated a single JSONL file with 3145 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated a single JSONL file with 525 samples (75 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated JSONL file with - 620 max words, 75 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated JSONL file with - 650 max words, 75 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated a single JSONL file with 5860 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated a single JSONL file with 4379 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated a single JSONL file with 1382 samples (75 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated a single JSONL file with 755 samples (75 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated a single JSONL file with 747 samples (75 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated JSONL file with - 540 max words, 75 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated a single JSONL file with 5255 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated JSONL file with - 960 max words, 75 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated a single JSONL file with 435 samples (75 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 75 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated JSONL file with - 910 max words, 75 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated a single JSONL file with 6581 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated JSONL file with - 670 max words, 75 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated JSONL file with - 660 max words, 75 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated a single JSONL file with 2798 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated JSONL file with - 740 max words, 75 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated JSONL file with - 990 max words, 75 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated a single JSONL file with 8727 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated JSONL file with - 710 max words, 75 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated JSONL file with - 950 max words, 75 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 75 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated JSONL file with - 970 max words, 75 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated a single JSONL file with 7513 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated JSONL file with - 980 max words, 75 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 75 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 75 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 75 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated JSONL file with - 700 max words, 75 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated JSONL file with - 720 max words, 75 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 75 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated JSONL file with - 730 max words, 75 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated a single JSONL file with 444 samples (75 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated a single JSONL file with 435 samples (75 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated a single JSONL file with 10623 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated a single JSONL file with 13076 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated JSONL file with - 760 max words, 75 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "Generated JSONL file with - 860 max words, 75 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 940 max words, 75 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated JSONL file with - 1880 max words, 75 samples - at ./dataset/gen-word-1880-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated JSONL file with - 770 max words, 75 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated a single JSONL file with 239 samples (75 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated JSONL file with - 810 max words, 75 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated JSONL file with - 750 max words, 75 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated JSONL file with - 820 max words, 75 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 75 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", + "Generated JSONL file with - 840 max words, 75 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated JSONL file with - 790 max words, 75 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 305 samples (75 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated JSONL file with - 850 max words, 75 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 75 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated JSONL file with - 890 max words, 75 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated JSONL file with - 930 max words, 75 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 75 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated JSONL file with - 870 max words, 75 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 75 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated JSONL file with - 920 max words, 75 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 780 max words, 75 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated a single JSONL file with 243 samples (75 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated JSONL file with - 1640 max words, 75 samples - at ./dataset/gen-word-1640-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated a single JSONL file with 241 samples (75 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated JSONL file with - 830 max words, 75 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated a single JSONL file with 17734 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 75 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 75 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 75 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated JSONL file with - 1700 max words, 75 samples - at ./dataset/gen-word-1700-count.jsonl\n", + "Generated JSONL file with - 2430 max words, 75 samples - at ./dataset/gen-word-2430-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 75 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 75 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 75 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated JSONL file with - 2750 max words, 75 samples - at ./dataset/gen-word-2750-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 75 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", + "Generated a single JSONL file with 296 samples (75 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated JSONL file with - 1860 max words, 75 samples - at ./dataset/gen-word-1860-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 75 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 75 samples - at ./dataset/gen-word-1320-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated JSONL file with - 1930 max words, 75 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "Generated JSONL file with - 880 max words, 75 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated JSONL file with - 2360 max words, 75 samples - at ./dataset/gen-word-2360-count.jsonl\n", + "Generated a single JSONL file with 26117 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 75 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated a single JSONL file with 94 samples (75 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", + "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 75 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", + "Generated JSONL file with - 2300 max words, 75 samples - at ./dataset/gen-word-2300-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 75 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 221 samples (75 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 75 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated a single JSONL file with 243 samples (75 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 75 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 75 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", + "Generated JSONL file with - 2530 max words, 75 samples - at ./dataset/gen-word-2530-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated JSONL file with - 2970 max words, 75 samples - at ./dataset/gen-word-2970-count.jsonl\n", + "Generated JSONL file with - 900 max words, 75 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", + "Generated a single JSONL file with 191 samples (75 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 75 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 75 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 75 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 196 samples (75 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated JSONL file with - 1810 max words, 75 samples - at ./dataset/gen-word-1810-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 75 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 75 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 75 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated JSONL file with - 1310 max words, 75 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 75 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated JSONL file with - 1240 max words, 75 samples - at ./dataset/gen-word-1240-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 75 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 75 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated a single JSONL file with 193 samples (75 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 75 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 75 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated a single JSONL file with 191 samples (75 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 75 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 75 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated a single JSONL file with 178 samples (75 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated JSONL file with - 1440 max words, 75 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 75 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 75 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 75 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated a single JSONL file with 187 samples (75 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 75 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 75 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated JSONL file with - 1510 max words, 75 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", + "Generated JSONL file with - 1530 max words, 75 samples - at ./dataset/gen-word-1530-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated JSONL file with - 1540 max words, 75 samples - at ./dataset/gen-word-1540-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 154 samples (75 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 192 samples (75 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated JSONL file with - 1520 max words, 75 samples - at ./dataset/gen-word-1520-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated JSONL file with - 1560 max words, 75 samples - at ./dataset/gen-word-1560-count.jsonl\n", + "Generated JSONL file with - 1550 max words, 75 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", + "Generated a single JSONL file with 88 samples (75 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 75 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 75 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated JSONL file with - 1600 max words, 75 samples - at ./dataset/gen-word-1600-count.jsonl\n", + "Generated JSONL file with - 1730 max words, 75 samples - at ./dataset/gen-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated JSONL file with - 1590 max words, 75 samples - at ./dataset/gen-word-1590-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 75 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", + "Generated a single JSONL file with 155 samples (75 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 75 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated JSONL file with - 1570 max words, 75 samples - at ./dataset/gen-word-1570-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated JSONL file with - 1630 max words, 75 samples - at ./dataset/gen-word-1630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated JSONL file with - 1950 max words, 75 samples - at ./dataset/gen-word-1950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated JSONL file with - 1610 max words, 75 samples - at ./dataset/gen-word-1610-count.jsonl\n", + "Generated JSONL file with - 1680 max words, 75 samples - at ./dataset/gen-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", + "Generated JSONL file with - 2000 max words, 75 samples - at ./dataset/gen-word-2000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonlGenerated a single JSONL file with 150 samples (75 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", + "\n", + "Generated JSONL file with - 1650 max words, 75 samples - at ./dataset/gen-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", + "Generated JSONL file with - 2100 max words, 75 samples - at ./dataset/gen-word-2100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated JSONL file with - 1580 max words, 75 samples - at ./dataset/gen-word-1580-count.jsonl\n", + "Generated JSONL file with - 1740 max words, 75 samples - at ./dataset/gen-word-1740-count.jsonl\n", + "Generated JSONL file with - 1690 max words, 75 samples - at ./dataset/gen-word-1690-count.jsonl\n", + "Generated JSONL file with - 2270 max words, 75 samples - at ./dataset/gen-word-2270-count.jsonl\n", + "Generated JSONL file with - 1660 max words, 75 samples - at ./dataset/gen-word-1660-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", + "Generated JSONL file with - 1750 max words, 75 samples - at ./dataset/gen-word-1750-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", + "Generated JSONL file with - 1720 max words, 75 samples - at ./dataset/gen-word-1720-count.jsonl\n", + "Generated JSONL file with - 1620 max words, 75 samples - at ./dataset/gen-word-1620-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", + "Generated JSONL file with - 1770 max words, 75 samples - at ./dataset/gen-word-1770-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", + "Generated JSONL file with - 1780 max words, 75 samples - at ./dataset/gen-word-1780-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", + "Generated a single JSONL file with 120 samples (75 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", + "Generated JSONL file with - 2650 max words, 75 samples - at ./dataset/gen-word-2650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", + "Generated JSONL file with - 2810 max words, 75 samples - at ./dataset/gen-word-2810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", + "Generated a single JSONL file with 55780 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", + "Generated JSONL file with - 1710 max words, 75 samples - at ./dataset/gen-word-1710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", + "Generated JSONL file with - 2800 max words, 75 samples - at ./dataset/gen-word-2800-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", + "Generated JSONL file with - 2990 max words, 75 samples - at ./dataset/gen-word-2990-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", + "Generated JSONL file with - 1960 max words, 75 samples - at ./dataset/gen-word-1960-count.jsonl\n", + "Generated JSONL file with - 2740 max words, 75 samples - at ./dataset/gen-word-2740-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", + "Generated JSONL file with - 2290 max words, 75 samples - at ./dataset/gen-word-2290-count.jsonl\n", + "Generated JSONL file with - 1800 max words, 75 samples - at ./dataset/gen-word-1800-count.jsonl\n", + "Generated JSONL file with - 1840 max words, 75 samples - at ./dataset/gen-word-1840-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", + "Generated JSONL file with - 1760 max words, 75 samples - at ./dataset/gen-word-1760-count.jsonl\n", + "Generated a single JSONL file with 92 samples (75 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", + "Generated JSONL file with - 1820 max words, 75 samples - at ./dataset/gen-word-1820-count.jsonl\n", + "Generated JSONL file with - 1790 max words, 75 samples - at ./dataset/gen-word-1790-count.jsonl\n", + "Generated JSONL file with - 1830 max words, 75 samples - at ./dataset/gen-word-1830-count.jsonl\n", + "Generated JSONL file with - 1850 max words, 75 samples - at ./dataset/gen-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", + "Generated JSONL file with - 1890 max words, 75 samples - at ./dataset/gen-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", + "Generated JSONL file with - 2940 max words, 75 samples - at ./dataset/gen-word-2940-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", + "Generated JSONL file with - 1900 max words, 75 samples - at ./dataset/gen-word-1900-count.jsonl\n", + "Generated a single JSONL file with 81 samples (75 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", + "Generated JSONL file with - 1910 max words, 75 samples - at ./dataset/gen-word-1910-count.jsonl\n", + "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated JSONL file with - 1920 max words, 75 samples - at ./dataset/gen-word-1920-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", + "Generated JSONL file with - 2030 max words, 75 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", + "Generated JSONL file with - 2470 max words, 75 samples - at ./dataset/gen-word-2470-count.jsonl\n", + "Generated JSONL file with - 2020 max words, 75 samples - at ./dataset/gen-word-2020-count.jsonl\n", + "Generated JSONL file with - 2180 max words, 75 samples - at ./dataset/gen-word-2180-count.jsonl\n", + "Generated JSONL file with - 1870 max words, 75 samples - at ./dataset/gen-word-1870-count.jsonl\n", + "Generated JSONL file with - 1970 max words, 75 samples - at ./dataset/gen-word-1970-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", + "Generated JSONL file with - 2350 max words, 75 samples - at ./dataset/gen-word-2350-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", + "Generated JSONL file with - 2010 max words, 75 samples - at ./dataset/gen-word-2010-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", + "Generated JSONL file with - 1990 max words, 75 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "Generated JSONL file with - 1940 max words, 75 samples - at ./dataset/gen-word-1940-count.jsonl\n", + "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonl\n", + "Generated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated JSONL file with - 2120 max words, 75 samples - at ./dataset/gen-word-2120-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", + "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", + "Generated JSONL file with - 2070 max words, 75 samples - at ./dataset/gen-word-2070-count.jsonl\n", + "Generated JSONL file with - 2080 max words, 75 samples - at ./dataset/gen-word-2080-count.jsonl\n", + "Generated JSONL file with - 2190 max words, 75 samples - at ./dataset/gen-word-2190-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", + "Generated JSONL file with - 2110 max words, 75 samples - at ./dataset/gen-word-2110-count.jsonl\n", + "Generated JSONL file with - 1980 max words, 75 samples - at ./dataset/gen-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonl\n", + "Generated JSONL file with - 2060 max words, 75 samples - at ./dataset/gen-word-2060-count.jsonl\n", + "Generated JSONL file with - 2160 max words, 75 samples - at ./dataset/gen-word-2160-count.jsonl\n", + "Generated JSONL file with - 2040 max words, 75 samples - at ./dataset/gen-word-2040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonl\n", + "Generated JSONL file with - 2170 max words, 75 samples - at ./dataset/gen-word-2170-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", + "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", + "Generated JSONL file with - 2250 max words, 75 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated JSONL file with - 2710 max words, 75 samples - at ./dataset/gen-word-2710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", + "Generated JSONL file with - 2590 max words, 75 samples - at ./dataset/gen-word-2590-count.jsonl\n", + "Generated JSONL file with - 2280 max words, 75 samples - at ./dataset/gen-word-2280-count.jsonl\n", + "Generated JSONL file with - 2200 max words, 75 samples - at ./dataset/gen-word-2200-count.jsonl\n", + "Generated JSONL file with - 2130 max words, 75 samples - at ./dataset/gen-word-2130-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", + "Generated JSONL file with - 2260 max words, 75 samples - at ./dataset/gen-word-2260-count.jsonl\n", + "Generated JSONL file with - 2340 max words, 75 samples - at ./dataset/gen-word-2340-count.jsonl\n", + "Generated JSONL file with - 2230 max words, 75 samples - at ./dataset/gen-word-2230-count.jsonl\n", + "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", + "Generated JSONL file with - 2400 max words, 75 samples - at ./dataset/gen-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", + "Generated JSONL file with - 2140 max words, 75 samples - at ./dataset/gen-word-2140-count.jsonl\n", + "Generated JSONL file with - 2580 max words, 75 samples - at ./dataset/gen-word-2580-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", + "Generated JSONL file with - 2450 max words, 75 samples - at ./dataset/gen-word-2450-count.jsonl\n", + "Generated a single JSONL file with 120 samples (75 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", + "Generated JSONL file with - 2370 max words, 75 samples - at ./dataset/gen-word-2370-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonl\n", + "Generated JSONL file with - 2380 max words, 75 samples - at ./dataset/gen-word-2380-count.jsonl\n", + "Generated JSONL file with - 2220 max words, 75 samples - at ./dataset/gen-word-2220-count.jsonl\n", + "Generated JSONL file with - 2330 max words, 75 samples - at ./dataset/gen-word-2330-count.jsonl\n", + "Generated JSONL file with - 2240 max words, 75 samples - at ./dataset/gen-word-2240-count.jsonl\n", + "Generated a single JSONL file with 136 samples (75 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", + "Generated JSONL file with - 2210 max words, 75 samples - at ./dataset/gen-word-2210-count.jsonl\n", + "Generated JSONL file with - 2310 max words, 75 samples - at ./dataset/gen-word-2310-count.jsonl\n", + "Generated JSONL file with - 2410 max words, 75 samples - at ./dataset/gen-word-2410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", + "Generated JSONL file with - 2150 max words, 75 samples - at ./dataset/gen-word-2150-count.jsonl\n", + "Generated JSONL file with - 2560 max words, 75 samples - at ./dataset/gen-word-2560-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", + "Generated JSONL file with - 2320 max words, 75 samples - at ./dataset/gen-word-2320-count.jsonl\n", + "Generated a single JSONL file with 135 samples (75 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated JSONL file with - 2980 max words, 75 samples - at ./dataset/gen-word-2980-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonl\n", + "Generated JSONL file with - 2390 max words, 75 samples - at ./dataset/gen-word-2390-count.jsonl\n", + "Generated JSONL file with - 2090 max words, 75 samples - at ./dataset/gen-word-2090-count.jsonl\n", + "Generated a single JSONL file with 138 samples (75 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", + "Generated JSONL file with - 2050 max words, 75 samples - at ./dataset/gen-word-2050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", + "Generated a single JSONL file with 138 samples (75 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", + "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", + "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", + "Generated JSONL file with - 2930 max words, 75 samples - at ./dataset/gen-word-2930-count.jsonl\n", + "Generated JSONL file with - 2420 max words, 75 samples - at ./dataset/gen-word-2420-count.jsonl\n", + "Generated JSONL file with - 2790 max words, 75 samples - at ./dataset/gen-word-2790-count.jsonl\n", + "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", + "Generated JSONL file with - 2440 max words, 75 samples - at ./dataset/gen-word-2440-count.jsonl\n", + "Generated JSONL file with - 2460 max words, 75 samples - at ./dataset/gen-word-2460-count.jsonl\n", + "Generated a single JSONL file with 114 samples (75 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", + "Generated a single JSONL file with 115 samples (75 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", + "Generated a single JSONL file with 117 samples (75 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", + "Generated JSONL file with - 2570 max words, 75 samples - at ./dataset/gen-word-2570-count.jsonl\n", + "Generated a single JSONL file with 108 samples (75 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", + "Generated JSONL file with - 2490 max words, 75 samples - at ./dataset/gen-word-2490-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", + "Generated a single JSONL file with 139 samples (75 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", + "Generated a single JSONL file with 144 samples (75 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", + "Generated a single JSONL file with 118 samples (75 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", + "Generated JSONL file with - 2630 max words, 75 samples - at ./dataset/gen-word-2630-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", + "Generated JSONL file with - 2540 max words, 75 samples - at ./dataset/gen-word-2540-count.jsonl\n", + "Generated JSONL file with - 2480 max words, 75 samples - at ./dataset/gen-word-2480-count.jsonl\n", + "Generated JSONL file with - 2600 max words, 75 samples - at ./dataset/gen-word-2600-count.jsonl\n", + "Generated JSONL file with - 2610 max words, 75 samples - at ./dataset/gen-word-2610-count.jsonl\n", + "Generated JSONL file with - 2510 max words, 75 samples - at ./dataset/gen-word-2510-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", + "Generated JSONL file with - 2620 max words, 75 samples - at ./dataset/gen-word-2620-count.jsonl\n", + "Generated a single JSONL file with 108 samples (75 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", + "Generated JSONL file with - 2500 max words, 75 samples - at ./dataset/gen-word-2500-count.jsonl\n", + "Generated a single JSONL file with 113 samples (75 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", + "Generated JSONL file with - 2820 max words, 75 samples - at ./dataset/gen-word-2820-count.jsonl\n", + "Generated JSONL file with - 2680 max words, 75 samples - at ./dataset/gen-word-2680-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", + "Generated JSONL file with - 2760 max words, 75 samples - at ./dataset/gen-word-2760-count.jsonl\n", + "Generated JSONL file with - 2550 max words, 75 samples - at ./dataset/gen-word-2550-count.jsonl\n", + "Generated JSONL file with - 2640 max words, 75 samples - at ./dataset/gen-word-2640-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonl\n", + "Generated JSONL file with - 2660 max words, 75 samples - at ./dataset/gen-word-2660-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", + "Generated JSONL file with - 2520 max words, 75 samples - at ./dataset/gen-word-2520-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", + "Generated JSONL file with - 2720 max words, 75 samples - at ./dataset/gen-word-2720-count.jsonl\n", + "Generated JSONL file with - 2830 max words, 75 samples - at ./dataset/gen-word-2830-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", + "Generated JSONL file with - 2770 max words, 75 samples - at ./dataset/gen-word-2770-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4775 max words - at ./dataset/shuffle-word-4775-count.jsonl\n", + "Generated JSONL file with - 2690 max words, 75 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 75 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated JSONL file with - 2950 max words, 75 samples - at ./dataset/gen-word-2950-count.jsonl\n", + "Generated JSONL file with - 2910 max words, 75 samples - at ./dataset/gen-word-2910-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", + "Generated JSONL file with - 2860 max words, 75 samples - at ./dataset/gen-word-2860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", + "Generated a single JSONL file with 112 samples (75 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", + "Generated JSONL file with - 2850 max words, 75 samples - at ./dataset/gen-word-2850-count.jsonl\n", + "Generated JSONL file with - 2890 max words, 75 samples - at ./dataset/gen-word-2890-count.jsonl\n", + "Generated JSONL file with - 2960 max words, 75 samples - at ./dataset/gen-word-2960-count.jsonl\n", + "Generated JSONL file with - 2900 max words, 75 samples - at ./dataset/gen-word-2900-count.jsonl\n", + "Generated JSONL file with - 2730 max words, 75 samples - at ./dataset/gen-word-2730-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", + "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated JSONL file with - 2780 max words, 75 samples - at ./dataset/gen-word-2780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated JSONL file with - 2670 max words, 75 samples - at ./dataset/gen-word-2670-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", + "Generated JSONL file with - 2840 max words, 75 samples - at ./dataset/gen-word-2840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", + "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", + "Generated JSONL file with - 2880 max words, 75 samples - at ./dataset/gen-word-2880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", + "Generated JSONL file with - 2920 max words, 75 samples - at ./dataset/gen-word-2920-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", + "Generated JSONL file with - 2870 max words, 75 samples - at ./dataset/gen-word-2870-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", + "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", + "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", + "Generated JSONL file with - 2700 max words, 75 samples - at ./dataset/gen-word-2700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", + "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", + "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", + "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", + "Generated JSONL file with - 4225 max words, 100 samples - at ./dataset/gen-word-4225-count.jsonl\n", + "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", + "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", + "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", + "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", + "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonl\n", + "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", + "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", + "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", + "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", + "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5850 max words - at ./dataset/shuffle-word-5850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", + "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4075 max words - at ./dataset/shuffle-word-4075-count.jsonl\n", + "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", + "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", + "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", + "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", + "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", + "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", + "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4125 max words - at ./dataset/shuffle-word-4125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", + "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", + "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", + "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", + "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", + "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", + "Generated JSONL file with - 4350 max words, 100 samples - at ./dataset/gen-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", + "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", + "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", + "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", + "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", + "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4300 max words - at ./dataset/shuffle-word-4300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4375 max words - at ./dataset/shuffle-word-4375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", + "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", + "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", + "Generated JSONL file with - 4025 max words, 100 samples - at ./dataset/gen-word-4025-count.jsonl\n", + "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", + "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", + "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", + "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", + "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", + "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", + "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", + "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4575 max words - at ./dataset/shuffle-word-4575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", + "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", + "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", + "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", + "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", + "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", + "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", + "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", + "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", + "Generated JSONL file with - 4325 max words, 100 samples - at ./dataset/gen-word-4325-count.jsonl\n", + "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", + "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4925 max words - at ./dataset/shuffle-word-4925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", + "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", + "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", + "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", + "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", + "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4700 max words - at ./dataset/shuffle-word-4700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5275 max words - at ./dataset/shuffle-word-5275-count.jsonl\n", + "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", + "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", + "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", + "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", + "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", + "Generated JSONL file with - 5375 max words, 100 samples - at ./dataset/gen-word-5375-count.jsonl\n", + "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", + "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", + "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5875 max words - at ./dataset/shuffle-word-5875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", + "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", + "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", + "Generated JSONL file with - 5000 max words, 100 samples - at ./dataset/gen-word-5000-count.jsonl\n", + "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", + "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", + "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", + "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", + "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", + "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", + "Generated JSONL file with - 5875 max words, 100 samples - at ./dataset/gen-word-5875-count.jsonl\n", + "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", + "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", + "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", + "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", + "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", + "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", + "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", + "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", + "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", + "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", + "Generated JSONL file with - 4925 max words, 100 samples - at ./dataset/gen-word-4925-count.jsonl\n", + "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", + "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", + "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", + "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", + "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", + "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", + "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", + "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", + "Generated JSONL file with - 5700 max words, 100 samples - at ./dataset/gen-word-5700-count.jsonl\n", + "Generated JSONL file with - 6000 max words, 100 samples - at ./dataset/gen-word-6000-count.jsonl\n", + "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", + "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", + "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", + "## Done ##\n", + "total 1.8G\n", + "drwxr-xr-x 2 root root 40K Jan 23 20:09 .\n", + "drwxr-xr-x 5 root root 4.0K Jan 23 20:09 ..\n", + "-rw-r--r-- 1 root root 20K Jan 23 20:09 gen-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 106K Jan 23 20:09 gen-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 732K Jan 23 20:09 gen-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 746K Jan 23 20:09 gen-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 749K Jan 23 20:09 gen-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 757K Jan 23 20:09 gen-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 762K Jan 23 20:09 gen-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 785K Jan 23 20:09 gen-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 774K Jan 23 20:09 gen-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 787K Jan 23 20:09 gen-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 794K Jan 23 20:09 gen-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 809K Jan 23 20:09 gen-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 88K Jan 23 20:09 gen-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 805K Jan 23 20:09 gen-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 818K Jan 23 20:09 gen-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 817K Jan 23 20:09 gen-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 832K Jan 23 20:09 gen-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 844K Jan 23 20:09 gen-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 855K Jan 23 20:09 gen-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 852K Jan 23 20:09 gen-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 868K Jan 23 20:09 gen-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 871K Jan 23 20:09 gen-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 875K Jan 23 20:09 gen-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 93K Jan 23 20:09 gen-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 883K Jan 23 20:09 gen-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 881K Jan 23 20:09 gen-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 898K Jan 23 20:09 gen-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 902K Jan 23 20:09 gen-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 924K Jan 23 20:09 gen-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 916K Jan 23 20:09 gen-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 928K Jan 23 20:09 gen-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 939K Jan 23 20:09 gen-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 940K Jan 23 20:09 gen-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 956K Jan 23 20:09 gen-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 105K Jan 23 20:09 gen-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 945K Jan 23 20:09 gen-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 968K Jan 23 20:09 gen-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 968K Jan 23 20:09 gen-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 981K Jan 23 20:09 gen-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 981K Jan 23 20:09 gen-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 993K Jan 23 20:09 gen-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 1001K Jan 23 20:09 gen-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1015K Jan 23 20:09 gen-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1014K Jan 23 20:09 gen-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1019K Jan 23 20:09 gen-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 109K Jan 23 20:09 gen-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 1023K Jan 23 20:09 gen-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 25K Jan 23 20:09 gen-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 116K Jan 23 20:09 gen-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 127K Jan 23 20:09 gen-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 133K Jan 23 20:09 gen-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 140K Jan 23 20:09 gen-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 147K Jan 23 20:09 gen-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 30K Jan 23 20:09 gen-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 154K Jan 23 20:09 gen-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 162K Jan 23 20:09 gen-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 169K Jan 23 20:09 gen-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 176K Jan 23 20:09 gen-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 186K Jan 23 20:09 gen-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 34K Jan 23 20:09 gen-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 193K Jan 23 20:09 gen-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 200K Jan 23 20:09 gen-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 204K Jan 23 20:09 gen-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 213K Jan 23 20:09 gen-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 219K Jan 23 20:09 gen-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 40K Jan 23 20:09 gen-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 228K Jan 23 20:09 gen-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 gen-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 gen-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 239K Jan 23 20:09 gen-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 244K Jan 23 20:09 gen-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 247K Jan 23 20:09 gen-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 261K Jan 23 20:09 gen-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 45K Jan 23 20:09 gen-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 265K Jan 23 20:09 gen-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 273K Jan 23 20:09 gen-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 283K Jan 23 20:09 gen-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 287K Jan 23 20:09 gen-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 294K Jan 23 20:09 gen-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 49K Jan 23 20:09 gen-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 301K Jan 23 20:09 gen-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 304K Jan 23 20:09 gen-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 314K Jan 23 20:09 gen-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 319K Jan 23 20:09 gen-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 333K Jan 23 20:09 gen-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 54K Jan 23 20:09 gen-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 339K Jan 23 20:09 gen-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 348K Jan 23 20:09 gen-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 355K Jan 23 20:09 gen-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 359K Jan 23 20:09 gen-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 358K Jan 23 20:09 gen-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 20:09 gen-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 60K Jan 23 20:09 gen-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 372K Jan 23 20:09 gen-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 385K Jan 23 20:09 gen-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 389K Jan 23 20:09 gen-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 391K Jan 23 20:09 gen-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 398K Jan 23 20:09 gen-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 63K Jan 23 20:09 gen-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 411K Jan 23 20:09 gen-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 423K Jan 23 20:09 gen-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 423K Jan 23 20:09 gen-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 433K Jan 23 20:09 gen-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 435K Jan 23 20:09 gen-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 20:09 gen-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 20:09 gen-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 5.8M Jan 23 20:09 gen-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 70K Jan 23 20:09 gen-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 445K Jan 23 20:09 gen-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.8M Jan 23 20:09 gen-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 456K Jan 23 20:09 gen-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 455K Jan 23 20:09 gen-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 468K Jan 23 20:09 gen-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 474K Jan 23 20:09 gen-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 71K Jan 23 20:09 gen-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 486K Jan 23 20:09 gen-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 492K Jan 23 20:09 gen-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 492K Jan 23 20:09 gen-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 509K Jan 23 20:09 gen-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 507K Jan 23 20:09 gen-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 78K Jan 23 20:09 gen-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 518K Jan 23 20:09 gen-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 524K Jan 23 20:09 gen-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 530K Jan 23 20:09 gen-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 534K Jan 23 20:09 gen-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 552K Jan 23 20:09 gen-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 82K Jan 23 20:09 gen-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 555K Jan 23 20:09 gen-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 566K Jan 23 20:09 gen-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 575K Jan 23 20:09 gen-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 571K Jan 23 20:09 gen-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 581K Jan 23 20:09 gen-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 87K Jan 23 20:09 gen-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 593K Jan 23 20:09 gen-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 595K Jan 23 20:09 gen-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 601K Jan 23 20:09 gen-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 614K Jan 23 20:09 gen-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 623K Jan 23 20:09 gen-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 93K Jan 23 20:09 gen-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 631K Jan 23 20:09 gen-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 628K Jan 23 20:09 gen-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 645K Jan 23 20:09 gen-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 649K Jan 23 20:09 gen-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 656K Jan 23 20:09 gen-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 98K Jan 23 20:09 gen-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 667K Jan 23 20:09 gen-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 671K Jan 23 20:09 gen-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 680K Jan 23 20:09 gen-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 690K Jan 23 20:09 gen-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 692K Jan 23 20:09 gen-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 20:09 gen-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 694K Jan 23 20:09 gen-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 713K Jan 23 20:09 gen-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 714K Jan 23 20:09 gen-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 725K Jan 23 20:09 gen-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 732K Jan 23 20:09 gen-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 shuffle-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 shuffle-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 shuffle-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 shuffle-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 shuffle-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 shuffle-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 shuffle-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 shuffle-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 8.0M Jan 23 20:09 shuffle-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 shuffle-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 shuffle-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 shuffle-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 12K Jan 23 20:09 word-2-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 20:09 word-4-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 3000 words dataset\n", + "# \n", + "for i in {110..3000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 3000+ - 400 words dataset\n", + "# \n", + "for i in {3025..6000..25} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e66e145e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolving data files: 100%|███████████████| 862/862 [00:00<00:00, 148632.68it/s]\n", + "Filter (num_proc=96): 16%|▊ | 48773/312109 [00:25<02:19, 1882.75 examples/s]\n", + "Map (num_proc=96): 100%|██████| 307741/307741 [00:05<00:00, 54820.73 examples/s]\n", + "Map (num_proc=96): 100%|███████| 307741/307741 [00:42<00:00, 7168.62 examples/s]\n", + "Map (num_proc=96): 100%|█████████| 36846/36846 [00:13<00:00, 2761.34 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 36846/36846 [00:01<00:00, 23444.71 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 1547/1547 [00:00<00:00, 34203.75 exampl\n" + ] + } + ], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "6413a747", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0bdba654", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-23 20:13:18,992] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-3B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=8192'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-3B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=8192'].\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 3745190225\n", + "Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 256\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 4\n", + " - effective_batch_size: 256\n", + "\n", + "[rank: 0] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-23 20:13:51,892] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:51,961] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,008] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,062] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,079] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,080] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,114] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 1] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 3] Seed set to 3745190225\n", + "[rank: 2] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 7] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 6] Seed set to 3745190225\n", + "[rank: 4] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 5] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 6] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 7] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 3] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 1] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 2] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_201446-ldvjsc3w\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/ldvjsc3w\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 2.000e-04 (0.0002)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.014776945114135742 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10196971893310547 seconds\n", + "Time to load fused_adam op: 0.1021115779876709 seconds\n", + "Time to load fused_adam op: 0.10188078880310059 seconds\n", + "Time to load fused_adam op: 0.10222482681274414 secondsTime to load fused_adam op: 0.10220718383789062 seconds\n", + "\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10234403610229492 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.1023564338684082 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 167 M \n", + "1 | blocks | ModuleList | 2.7 B \n", + "2 | ln_out | LayerNorm | 5.1 K \n", + "3 | head | Linear | 167 M \n", + "--------------------------------------\n", + "3.1 B Trainable params\n", + "0 Non-trainable params\n", + "3.1 B Total params\n", + "12,251.996Total estimated model params size (MB)\n", + "Epoch 0: 17%|▏| 100/576 [18:37<1:28:41, 0.09it/s, v_num=sc3w, train/loss=0.007/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|█| 576/576 [1:37:18<00:00, 0.10it/s, v_num=sc3w, train/loss=1.270\n", + "Validation: | | 0/? [00:00 This project assumes you have the rwkv-infctx conda env setup, and you are executing in that environment - see the main README.md for the conda env setup steps" + ] + }, + { + "cell_type": "markdown", + "id": "f6aac483", + "metadata": { + "papermill": { + "duration": 0.004873, + "end_time": "2024-01-23T07:52:11.188157", + "exception": false, + "start_time": "2024-01-23T07:52:11.183284", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Configure your environment settings\n", + "(!Important: you will need to rerun the below cell, if you restart your kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0c538903", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:11.196870Z", + "iopub.status.busy": "2024-01-23T07:52:11.196676Z", + "iopub.status.idle": "2024-01-23T07:52:11.207800Z", + "shell.execute_reply": "2024-01-23T07:52:11.206964Z" + }, + "papermill": { + "duration": 0.017119, + "end_time": "2024-01-23T07:52:11.210197", + "exception": false, + "start_time": "2024-01-23T07:52:11.193078", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_2\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_2\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"[8xA100] RWKV-v5-7B-World\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-7B-world.pth\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "MEMORY_SCRIPT_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./notebook/util-scripts/memory_script\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "markdown", + "id": "68a6f8e5", + "metadata": { + "papermill": { + "duration": 0.00356, + "end_time": "2024-01-23T07:52:11.220001", + "exception": false, + "start_time": "2024-01-23T07:52:11.216441", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Download the pretrained model\n", + "(if you want to skip the the basemodel train + instruct tune)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d61f8a7a", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:11.226610Z", + "iopub.status.busy": "2024-01-23T07:52:11.225973Z", + "iopub.status.idle": "2024-01-23T07:52:11.717381Z", + "shell.execute_reply": "2024-01-23T07:52:11.716269Z" + }, + "papermill": { + "duration": 0.497049, + "end_time": "2024-01-23T07:52:11.719954", + "exception": false, + "start_time": "2024-01-23T07:52:11.222905", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "3928b63f", + "metadata": { + "papermill": { + "duration": 0.004645, + "end_time": "2024-01-23T07:52:11.730080", + "exception": false, + "start_time": "2024-01-23T07:52:11.725435", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 2*2k) : Dataset preperation\n", + "\n", + "Stage 1, handles total context size of 4096. Meaning it will be tuned for memory task of approximately 2k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b100d015", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:11.737515Z", + "iopub.status.busy": "2024-01-23T07:52:11.736355Z", + "iopub.status.idle": "2024-01-23T07:52:15.468489Z", + "shell.execute_reply": "2024-01-23T07:52:15.467116Z" + }, + "papermill": { + "duration": 3.738786, + "end_time": "2024-01-23T07:52:15.471307", + "exception": false, + "start_time": "2024-01-23T07:52:11.732521", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Folder and eval pip setup\n", + "!cp -r \"{MEMORY_SCRIPT_DIR}/\" \"{NOTEBOOK_DIR}/\"\n", + "!python3 -m pip install rwkv asyncio aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a80b46d0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:15.483048Z", + "iopub.status.busy": "2024-01-23T07:52:15.482635Z", + "iopub.status.idle": "2024-01-23T07:52:18.671801Z", + "shell.execute_reply": "2024-01-23T07:52:18.670751Z" + }, + "papermill": { + "duration": 3.348865, + "end_time": "2024-01-23T07:52:18.826676", + "exception": false, + "start_time": "2024-01-23T07:52:15.477811", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for < 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 150 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {110..200..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {210..4000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e0376d7", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:52:19.064200Z", + "iopub.status.busy": "2024-01-23T07:52:19.063812Z", + "iopub.status.idle": "2024-01-23T07:55:09.987257Z", + "shell.execute_reply": "2024-01-23T07:55:09.985659Z" + }, + "papermill": { + "duration": 171.009128, + "end_time": "2024-01-23T07:55:09.990364", + "exception": false, + "start_time": "2024-01-23T07:52:18.981236", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-1-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-1-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "4a778ddb", + "metadata": { + "papermill": { + "duration": 0.114062, + "end_time": "2024-01-23T07:55:10.231871", + "exception": false, + "start_time": "2024-01-23T07:55:10.117809", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 2*2k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2b4f921", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T07:55:10.463735Z", + "iopub.status.busy": "2024-01-23T07:55:10.463303Z", + "iopub.status.idle": "2024-01-23T11:18:37.403552Z", + "shell.execute_reply": "2024-01-23T11:18:37.402122Z" + }, + "papermill": { + "duration": 12207.060283, + "end_time": "2024-01-23T11:18:37.406917", + "exception": false, + "start_time": "2024-01-23T07:55:10.346634", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-1-tune.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-1 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=4 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae68ae18", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:18:37.833406Z", + "iopub.status.busy": "2024-01-23T11:18:37.832966Z", + "iopub.status.idle": "2024-01-23T11:19:35.452202Z", + "shell.execute_reply": "2024-01-23T11:19:35.450809Z" + }, + "papermill": { + "duration": 57.804367, + "end_time": "2024-01-23T11:19:35.454988", + "exception": false, + "start_time": "2024-01-23T11:18:37.650621", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30726953", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:22:23.370145Z", + "iopub.status.busy": "2024-01-23T11:22:23.369718Z", + "iopub.status.idle": "2024-01-23T11:31:09.313399Z", + "shell.execute_reply": "2024-01-23T11:31:09.312027Z" + }, + "papermill": { + "duration": 526.138711, + "end_time": "2024-01-23T11:31:09.316221", + "exception": false, + "start_time": "2024-01-23T11:22:23.177510", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\" \"none\" 1000 3000" + ] + }, + { + "cell_type": "markdown", + "id": "ddec3af2", + "metadata": {}, + "source": [ + "## Finetune 2 (0 -> 2*4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b3cafb4", + "metadata": {}, + "outputs": [], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 3000 words dataset\n", + "# \n", + "for i in {110..3000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 3000+ - 400 words dataset\n", + "# \n", + "for i in {3000..6000..25} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60e9c3ab", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "2b1343c6", + "metadata": {}, + "source": [ + "## Finetune 1 (0 -> 2*2k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a120238c", + "metadata": {}, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-2-tune.yaml\" \\\n", + " --model.load_model=\"../model/Memory-Tune-Stage-1-{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-2 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=4 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d4041ad", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a793df6", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 1000 4000\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 4000 8000" + ] + }, + { + "cell_type": "markdown", + "id": "9cc1c2df", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bae4ec97", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n", + "Generated JSONL file with - 15 max words, 100 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 4 max words, 100 samples - at ./dataset/word-4-count.jsonl\n", + "Generated JSONL file with - 2 max words, 100 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 5 max words, 100 samples - at ./dataset/gen-word-5-count.jsonlGenerated JSONL file with - 10 max words, 100 samples - at ./dataset/gen-word-10-count.jsonl\n", + "\n", + "Generated JSONL file with - 30 max words, 100 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 35 max words, 100 samples - at ./dataset/gen-word-35-count.jsonl\n", + "Generated JSONL file with - 25 max words, 100 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 20 max words, 100 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 50 max words, 100 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 40 max words, 100 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 70 max words, 100 samples - at ./dataset/gen-word-70-count.jsonl\n", + "Generated JSONL file with - 80 max words, 100 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 55 max words, 100 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 75 max words, 100 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 95 max words, 100 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 45 max words, 100 samples - at ./dataset/gen-word-45-count.jsonl\n", + "Generated JSONL file with - 60 max words, 100 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 120 max words, 75 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 90 max words, 100 samples - at ./dataset/gen-word-90-count.jsonl\n", + "Generated JSONL file with - 100 max words, 100 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 110 max words, 75 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 130 max words, 75 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 140 max words, 75 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated JSONL file with - 85 max words, 100 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 150 max words, 75 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 180 max words, 75 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 220 max words, 75 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 250 max words, 75 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 260 max words, 75 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated JSONL file with - 65 max words, 100 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 190 max words, 75 samples - at ./dataset/gen-word-190-count.jsonl\n", + "Generated JSONL file with - 200 max words, 75 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 170 max words, 75 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 290 max words, 75 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 240 max words, 75 samples - at ./dataset/gen-word-240-count.jsonl\n", + "Generated JSONL file with - 310 max words, 75 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated JSONL file with - 380 max words, 75 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated JSONL file with - 350 max words, 75 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated JSONL file with - 330 max words, 75 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 340 max words, 75 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 230 max words, 75 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 210 max words, 75 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 160 max words, 75 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated a single JSONL file with 1063 samples (75 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated a single JSONL file with 1288 samples (75 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated a single JSONL file with 1481 samples (75 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated JSONL file with - 470 max words, 75 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated a single JSONL file with 1381 samples (75 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated a single JSONL file with 1158 samples (75 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated a single JSONL file with 730 samples (75 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated JSONL file with - 490 max words, 75 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated a single JSONL file with 797 samples (75 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "Generated JSONL file with - 370 max words, 75 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated JSONL file with - 360 max words, 75 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated JSONL file with - 390 max words, 75 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated a single JSONL file with 1215 samples (75 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", + "Generated a single JSONL file with 685 samples (75 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated JSONL file with - 450 max words, 75 samples - at ./dataset/gen-word-450-count.jsonl\n", + "Generated JSONL file with - 530 max words, 75 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated a single JSONL file with 707 samples (75 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated a single JSONL file with 750 samples (75 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated a single JSONL file with 688 samples (75 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "Generated JSONL file with - 430 max words, 75 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 510 max words, 75 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 550 max words, 75 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated a single JSONL file with 3293 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated JSONL file with - 580 max words, 75 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated a single JSONL file with 524 samples (75 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated a single JSONL file with 697 samples (75 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated JSONL file with - 300 max words, 75 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated JSONL file with - 710 max words, 75 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated JSONL file with - 810 max words, 75 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated a single JSONL file with 2937 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 3532 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated a single JSONL file with 2671 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated a single JSONL file with 3130 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated JSONL file with - 700 max words, 75 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "Generated JSONL file with - 730 max words, 75 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 75 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated JSONL file with - 570 max words, 75 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated JSONL file with - 630 max words, 75 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated JSONL file with - 870 max words, 75 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated JSONL file with - 280 max words, 75 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated JSONL file with - 420 max words, 75 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated a single JSONL file with 2794 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated a single JSONL file with 1037 samples (75 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated JSONL file with - 780 max words, 75 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated JSONL file with - 440 max words, 75 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated a single JSONL file with 521 samples (75 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated JSONL file with - 590 max words, 75 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated JSONL file with - 760 max words, 75 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated JSONL file with - 640 max words, 75 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated a single JSONL file with 3770 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated JSONL file with - 480 max words, 75 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 740 max words, 75 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated JSONL file with - 770 max words, 75 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated JSONL file with - 880 max words, 75 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated a single JSONL file with 4805 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated a single JSONL file with 4074 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonl\n", + "Generated a single JSONL file with 529 samples (75 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated a single JSONL file with 5231 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated JSONL file with - 270 max words, 75 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 610 max words, 75 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated JSONL file with - 750 max words, 75 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated JSONL file with - 860 max words, 75 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 500 max words, 75 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated a single JSONL file with 682 samples (75 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated JSONL file with - 680 max words, 75 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated a single JSONL file with 7562 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated JSONL file with - 540 max words, 75 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated a single JSONL file with 754 samples (75 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated JSONL file with - 400 max words, 75 samples - at ./dataset/gen-word-400-count.jsonl\n", + "Generated JSONL file with - 460 max words, 75 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated JSONL file with - 520 max words, 75 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated a single JSONL file with 434 samples (75 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated a single JSONL file with 10604 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated JSONL file with - 560 max words, 75 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated a single JSONL file with 527 samples (75 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated JSONL file with - 320 max words, 75 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 690 max words, 75 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated a single JSONL file with 8750 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated JSONL file with - 830 max words, 75 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated a single JSONL file with 6559 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated JSONL file with - 410 max words, 75 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated JSONL file with - 910 max words, 75 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated JSONL file with - 650 max words, 75 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated JSONL file with - 790 max words, 75 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 75 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated JSONL file with - 720 max words, 75 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated JSONL file with - 820 max words, 75 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated a single JSONL file with 540 samples (75 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated a single JSONL file with 4388 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated JSONL file with - 930 max words, 75 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated a single JSONL file with 1023 samples (75 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated JSONL file with - 600 max words, 75 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated JSONL file with - 620 max words, 75 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated a single JSONL file with 440 samples (75 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated JSONL file with - 920 max words, 75 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 670 max words, 75 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated JSONL file with - 970 max words, 75 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated JSONL file with - 990 max words, 75 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated JSONL file with - 660 max words, 75 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated JSONL file with - 800 max words, 75 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 75 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated JSONL file with - 980 max words, 75 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated JSONL file with - 960 max words, 75 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 5882 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated a single JSONL file with 1013 samples (75 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 75 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated JSONL file with - 900 max words, 75 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated a single JSONL file with 13011 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonl\n", + "Generated JSONL file with - 890 max words, 75 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 370 samples (75 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 75 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated a single JSONL file with 221 samples (75 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 75 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 75 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated a single JSONL file with 449 samples (75 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated JSONL file with - 840 max words, 75 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated a single JSONL file with 693 samples (75 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonlGenerated a single JSONL file with 443 samples (75 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 1097 samples (75 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated a single JSONL file with 529 samples (75 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated a single JSONL file with 435 samples (75 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated JSONL file with - 940 max words, 75 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 75 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated a single JSONL file with 17788 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated a single JSONL file with 234 samples (75 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated a single JSONL file with 296 samples (75 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated a single JSONL file with 242 samples (75 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 75 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated a single JSONL file with 239 samples (75 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 75 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated JSONL file with - 850 max words, 75 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated JSONL file with - 950 max words, 75 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated a single JSONL file with 234 samples (75 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 75 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated a single JSONL file with 297 samples (75 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 75 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 75 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated a single JSONL file with 239 samples (75 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 75 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated a single JSONL file with 437 samples (75 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 75 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 75 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 75 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 75 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 75 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 75 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated a single JSONL file with 446 samples (75 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated a single JSONL file with 237 samples (75 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 75 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated JSONL file with - 1820 max words, 75 samples - at ./dataset/gen-word-1820-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 75 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 75 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 75 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated JSONL file with - 1240 max words, 75 samples - at ./dataset/gen-word-1240-count.jsonl\n", + "Generated JSONL file with - 2900 max words, 75 samples - at ./dataset/gen-word-2900-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 75 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 75 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 75 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 75 samples - at ./dataset/gen-word-1320-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 75 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated JSONL file with - 1760 max words, 75 samples - at ./dataset/gen-word-1760-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 75 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 75 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 75 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 26100 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 75 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 75 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated a single JSONL file with 189 samples (75 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 75 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 75 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 194 samples (75 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 75 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated a single JSONL file with 193 samples (75 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", + "Generated JSONL file with - 1560 max words, 75 samples - at ./dataset/gen-word-1560-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 75 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 75 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated a single JSONL file with 187 samples (75 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 75 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated a single JSONL file with 188 samples (75 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated JSONL file with - 1710 max words, 75 samples - at ./dataset/gen-word-1710-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", + "Generated JSONL file with - 1440 max words, 75 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "Generated JSONL file with - 1310 max words, 75 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated a single JSONL file with 189 samples (75 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", + "Generated a single JSONL file with 184 samples (75 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 75 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 75 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 75 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 75 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 75 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated JSONL file with - 1850 max words, 75 samples - at ./dataset/gen-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 75 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 75 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated JSONL file with - 1580 max words, 75 samples - at ./dataset/gen-word-1580-count.jsonl\n", + "Generated JSONL file with - 1600 max words, 75 samples - at ./dataset/gen-word-1600-count.jsonl\n", + "Generated JSONL file with - 1550 max words, 75 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated JSONL file with - 1530 max words, 75 samples - at ./dataset/gen-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated JSONL file with - 1510 max words, 75 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated JSONL file with - 1660 max words, 75 samples - at ./dataset/gen-word-1660-count.jsonl\n", + "Generated JSONL file with - 1540 max words, 75 samples - at ./dataset/gen-word-1540-count.jsonl\n", + "Generated JSONL file with - 1520 max words, 75 samples - at ./dataset/gen-word-1520-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 75 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated JSONL file with - 1570 max words, 75 samples - at ./dataset/gen-word-1570-count.jsonl\n", + "Generated JSONL file with - 1670 max words, 75 samples - at ./dataset/gen-word-1670-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonlGenerated a single JSONL file with 150 samples (75 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated JSONL file with - 1630 max words, 75 samples - at ./dataset/gen-word-1630-count.jsonl\n", + "Generated JSONL file with - 1650 max words, 75 samples - at ./dataset/gen-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", + "Generated JSONL file with - 1680 max words, 75 samples - at ./dataset/gen-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated JSONL file with - 1700 max words, 75 samples - at ./dataset/gen-word-1700-count.jsonl\n", + "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", + "Generated JSONL file with - 1620 max words, 75 samples - at ./dataset/gen-word-1620-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated JSONL file with - 1640 max words, 75 samples - at ./dataset/gen-word-1640-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", + "Generated JSONL file with - 1720 max words, 75 samples - at ./dataset/gen-word-1720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", + "Generated JSONL file with - 1690 max words, 75 samples - at ./dataset/gen-word-1690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", + "Generated JSONL file with - 1770 max words, 75 samples - at ./dataset/gen-word-1770-count.jsonl\n", + "Generated JSONL file with - 1830 max words, 75 samples - at ./dataset/gen-word-1830-count.jsonl\n", + "Generated JSONL file with - 1730 max words, 75 samples - at ./dataset/gen-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", + "Generated JSONL file with - 1800 max words, 75 samples - at ./dataset/gen-word-1800-count.jsonl\n", + "Generated JSONL file with - 1810 max words, 75 samples - at ./dataset/gen-word-1810-count.jsonl\n", + "Generated JSONL file with - 1590 max words, 75 samples - at ./dataset/gen-word-1590-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", + "Generated JSONL file with - 1840 max words, 75 samples - at ./dataset/gen-word-1840-count.jsonl\n", + "Generated JSONL file with - 1780 max words, 75 samples - at ./dataset/gen-word-1780-count.jsonl\n", + "Generated JSONL file with - 1610 max words, 75 samples - at ./dataset/gen-word-1610-count.jsonl\n", + "Generated JSONL file with - 1790 max words, 75 samples - at ./dataset/gen-word-1790-count.jsonl\n", + "Generated JSONL file with - 1750 max words, 75 samples - at ./dataset/gen-word-1750-count.jsonl\n", + "Generated JSONL file with - 1740 max words, 75 samples - at ./dataset/gen-word-1740-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", + "Generated JSONL file with - 1900 max words, 75 samples - at ./dataset/gen-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", + "Generated JSONL file with - 1860 max words, 75 samples - at ./dataset/gen-word-1860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", + "Generated JSONL file with - 2220 max words, 75 samples - at ./dataset/gen-word-2220-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", + "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", + "Generated JSONL file with - 1920 max words, 75 samples - at ./dataset/gen-word-1920-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", + "Generated JSONL file with - 1880 max words, 75 samples - at ./dataset/gen-word-1880-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", + "Generated JSONL file with - 1910 max words, 75 samples - at ./dataset/gen-word-1910-count.jsonl\n", + "Generated JSONL file with - 1930 max words, 75 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", + "Generated JSONL file with - 1870 max words, 75 samples - at ./dataset/gen-word-1870-count.jsonl\n", + "Generated JSONL file with - 1890 max words, 75 samples - at ./dataset/gen-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", + "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", + "Generated JSONL file with - 1990 max words, 75 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", + "Generated JSONL file with - 2000 max words, 75 samples - at ./dataset/gen-word-2000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonl\n", + "Generated a single JSONL file with 55753 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", + "Generated JSONL file with - 1970 max words, 75 samples - at ./dataset/gen-word-1970-count.jsonl\n", + "Generated JSONL file with - 2020 max words, 75 samples - at ./dataset/gen-word-2020-count.jsonl\n", + "Generated JSONL file with - 2040 max words, 75 samples - at ./dataset/gen-word-2040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonl\n", + "Generated JSONL file with - 2070 max words, 75 samples - at ./dataset/gen-word-2070-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", + "Generated JSONL file with - 2030 max words, 75 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated JSONL file with - 2090 max words, 75 samples - at ./dataset/gen-word-2090-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated JSONL file with - 1950 max words, 75 samples - at ./dataset/gen-word-1950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", + "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", + "Generated JSONL file with - 2150 max words, 75 samples - at ./dataset/gen-word-2150-count.jsonl\n", + "Generated JSONL file with - 2390 max words, 75 samples - at ./dataset/gen-word-2390-count.jsonl\n", + "Generated JSONL file with - 2120 max words, 75 samples - at ./dataset/gen-word-2120-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated JSONL file with - 2140 max words, 75 samples - at ./dataset/gen-word-2140-count.jsonl\n", + "Generated a single JSONL file with 84 samples (75 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", + "Generated JSONL file with - 2200 max words, 75 samples - at ./dataset/gen-word-2200-count.jsonl\n", + "Generated JSONL file with - 2270 max words, 75 samples - at ./dataset/gen-word-2270-count.jsonl\n", + "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", + "Generated a single JSONL file with 110 samples (75 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", + "Generated JSONL file with - 2190 max words, 75 samples - at ./dataset/gen-word-2190-count.jsonl\n", + "Generated JSONL file with - 2480 max words, 75 samples - at ./dataset/gen-word-2480-count.jsonl\n", + "Generated JSONL file with - 2580 max words, 75 samples - at ./dataset/gen-word-2580-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", + "Generated JSONL file with - 2280 max words, 75 samples - at ./dataset/gen-word-2280-count.jsonl\n", + "Generated JSONL file with - 2100 max words, 75 samples - at ./dataset/gen-word-2100-count.jsonl\n", + "Generated JSONL file with - 1980 max words, 75 samples - at ./dataset/gen-word-1980-count.jsonl\n", + "Generated JSONL file with - 2050 max words, 75 samples - at ./dataset/gen-word-2050-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", + "Generated JSONL file with - 2080 max words, 75 samples - at ./dataset/gen-word-2080-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", + "Generated JSONL file with - 2010 max words, 75 samples - at ./dataset/gen-word-2010-count.jsonl\n", + "Generated JSONL file with - 2110 max words, 75 samples - at ./dataset/gen-word-2110-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", + "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", + "Generated JSONL file with - 1960 max words, 75 samples - at ./dataset/gen-word-1960-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonlGenerated a single JSONL file with 150 samples (75 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", + "\n", + "Generated JSONL file with - 2130 max words, 75 samples - at ./dataset/gen-word-2130-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", + "Generated JSONL file with - 2170 max words, 75 samples - at ./dataset/gen-word-2170-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", + "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", + "Generated JSONL file with - 2160 max words, 75 samples - at ./dataset/gen-word-2160-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4575 max words - at ./dataset/shuffle-word-4575-count.jsonl\n", + "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", + "Generated JSONL file with - 2180 max words, 75 samples - at ./dataset/gen-word-2180-count.jsonl\n", + "Generated JSONL file with - 2410 max words, 75 samples - at ./dataset/gen-word-2410-count.jsonl\n", + "Generated JSONL file with - 2400 max words, 75 samples - at ./dataset/gen-word-2400-count.jsonl\n", + "Generated JSONL file with - 2560 max words, 75 samples - at ./dataset/gen-word-2560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", + "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", + "Generated JSONL file with - 2570 max words, 75 samples - at ./dataset/gen-word-2570-count.jsonl\n", + "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", + "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "Generated JSONL file with - 2250 max words, 75 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated JSONL file with - 1940 max words, 75 samples - at ./dataset/gen-word-1940-count.jsonl\n", + "Generated JSONL file with - 2260 max words, 75 samples - at ./dataset/gen-word-2260-count.jsonl\n", + "Generated JSONL file with - 2060 max words, 75 samples - at ./dataset/gen-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", + "Generated JSONL file with - 2230 max words, 75 samples - at ./dataset/gen-word-2230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", + "Generated JSONL file with - 6000 max words, 100 samples - at ./dataset/gen-word-6000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", + "Generated JSONL file with - 2660 max words, 75 samples - at ./dataset/gen-word-2660-count.jsonl\n", + "Generated JSONL file with - 2760 max words, 75 samples - at ./dataset/gen-word-2760-count.jsonl\n", + "Generated JSONL file with - 2720 max words, 75 samples - at ./dataset/gen-word-2720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", + "Generated JSONL file with - 2330 max words, 75 samples - at ./dataset/gen-word-2330-count.jsonl\n", + "Generated JSONL file with - 2350 max words, 75 samples - at ./dataset/gen-word-2350-count.jsonl\n", + "Generated JSONL file with - 2310 max words, 75 samples - at ./dataset/gen-word-2310-count.jsonl\n", + "Generated JSONL file with - 2370 max words, 75 samples - at ./dataset/gen-word-2370-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", + "Generated JSONL file with - 2380 max words, 75 samples - at ./dataset/gen-word-2380-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", + "Generated JSONL file with - 2320 max words, 75 samples - at ./dataset/gen-word-2320-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonl\n", + "Generated JSONL file with - 2210 max words, 75 samples - at ./dataset/gen-word-2210-count.jsonl\n", + "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", + "Generated a single JSONL file with 141 samples (75 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", + "Generated JSONL file with - 2240 max words, 75 samples - at ./dataset/gen-word-2240-count.jsonl\n", + "Generated JSONL file with - 2450 max words, 75 samples - at ./dataset/gen-word-2450-count.jsonl\n", + "Generated JSONL file with - 2290 max words, 75 samples - at ./dataset/gen-word-2290-count.jsonl\n", + "Generated JSONL file with - 2430 max words, 75 samples - at ./dataset/gen-word-2430-count.jsonl\n", + "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", + "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", + "Generated JSONL file with - 2510 max words, 75 samples - at ./dataset/gen-word-2510-count.jsonl\n", + "Generated JSONL file with - 2470 max words, 75 samples - at ./dataset/gen-word-2470-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", + "Generated a single JSONL file with 134 samples (75 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", + "Generated a single JSONL file with 117 samples (75 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", + "Generated a single JSONL file with 140 samples (75 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", + "Generated JSONL file with - 2420 max words, 75 samples - at ./dataset/gen-word-2420-count.jsonl\n", + "Generated JSONL file with - 2440 max words, 75 samples - at ./dataset/gen-word-2440-count.jsonl\n", + "Generated a single JSONL file with 141 samples (75 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonl\n", + "Generated a single JSONL file with 136 samples (75 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", + "Generated JSONL file with - 2360 max words, 75 samples - at ./dataset/gen-word-2360-count.jsonl\n", + "Generated a single JSONL file with 140 samples (75 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", + "Generated a single JSONL file with 112 samples (75 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", + "Generated JSONL file with - 2460 max words, 75 samples - at ./dataset/gen-word-2460-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonl\n", + "Generated a single JSONL file with 132 samples (75 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", + "Generated JSONL file with - 2340 max words, 75 samples - at ./dataset/gen-word-2340-count.jsonl\n", + "Generated JSONL file with - 2300 max words, 75 samples - at ./dataset/gen-word-2300-count.jsonl\n", + "Generated a single JSONL file with 116 samples (75 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", + "Generated a single JSONL file with 117 samples (75 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", + "Generated JSONL file with - 2530 max words, 75 samples - at ./dataset/gen-word-2530-count.jsonl\n", + "Generated JSONL file with - 2520 max words, 75 samples - at ./dataset/gen-word-2520-count.jsonl\n", + "Generated JSONL file with - 2550 max words, 75 samples - at ./dataset/gen-word-2550-count.jsonl\n", + "Generated JSONL file with - 2600 max words, 75 samples - at ./dataset/gen-word-2600-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated a single JSONL file with 111 samples (75 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", + "Generated JSONL file with - 2610 max words, 75 samples - at ./dataset/gen-word-2610-count.jsonl\n", + "Generated JSONL file with - 2540 max words, 75 samples - at ./dataset/gen-word-2540-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", + "Generated JSONL file with - 2640 max words, 75 samples - at ./dataset/gen-word-2640-count.jsonl\n", + "Generated JSONL file with - 2590 max words, 75 samples - at ./dataset/gen-word-2590-count.jsonl\n", + "Generated JSONL file with - 2650 max words, 75 samples - at ./dataset/gen-word-2650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4075 max words - at ./dataset/shuffle-word-4075-count.jsonl\n", + "Generated JSONL file with - 2710 max words, 75 samples - at ./dataset/gen-word-2710-count.jsonl\n", + "Generated JSONL file with - 2670 max words, 75 samples - at ./dataset/gen-word-2670-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", + "Generated JSONL file with - 2690 max words, 75 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", + "Generated JSONL file with - 2620 max words, 75 samples - at ./dataset/gen-word-2620-count.jsonl\n", + "Generated a single JSONL file with 88 samples (75 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", + "Generated JSONL file with - 2630 max words, 75 samples - at ./dataset/gen-word-2630-count.jsonl\n", + "Generated a single JSONL file with 113 samples (75 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", + "Generated JSONL file with - 2740 max words, 75 samples - at ./dataset/gen-word-2740-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", + "Generated JSONL file with - 2680 max words, 75 samples - at ./dataset/gen-word-2680-count.jsonl\n", + "Generated a single JSONL file with 111 samples (75 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", + "Generated a single JSONL file with 114 samples (75 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", + "Generated JSONL file with - 2750 max words, 75 samples - at ./dataset/gen-word-2750-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", + "Generated JSONL file with - 2730 max words, 75 samples - at ./dataset/gen-word-2730-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", + "Generated JSONL file with - 2780 max words, 75 samples - at ./dataset/gen-word-2780-count.jsonl\n", + "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", + "Generated JSONL file with - 2790 max words, 75 samples - at ./dataset/gen-word-2790-count.jsonl\n", + "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", + "Generated JSONL file with - 4025 max words, 100 samples - at ./dataset/gen-word-4025-count.jsonl\n", + "Generated JSONL file with - 2800 max words, 75 samples - at ./dataset/gen-word-2800-count.jsonl\n", + "Generated JSONL file with - 4225 max words, 100 samples - at ./dataset/gen-word-4225-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", + "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", + "Generated JSONL file with - 2830 max words, 75 samples - at ./dataset/gen-word-2830-count.jsonl\n", + "Generated JSONL file with - 2810 max words, 75 samples - at ./dataset/gen-word-2810-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", + "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", + "Generated JSONL file with - 2840 max words, 75 samples - at ./dataset/gen-word-2840-count.jsonl\n", + "Generated JSONL file with - 2500 max words, 75 samples - at ./dataset/gen-word-2500-count.jsonl\n", + "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", + "Generated JSONL file with - 2850 max words, 75 samples - at ./dataset/gen-word-2850-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", + "Generated JSONL file with - 2490 max words, 75 samples - at ./dataset/gen-word-2490-count.jsonl\n", + "Generated JSONL file with - 2820 max words, 75 samples - at ./dataset/gen-word-2820-count.jsonl\n", + "Generated JSONL file with - 2860 max words, 75 samples - at ./dataset/gen-word-2860-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", + "Generated JSONL file with - 2770 max words, 75 samples - at ./dataset/gen-word-2770-count.jsonl\n", + "Generated a single JSONL file with 116 samples (75 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", + "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", + "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", + "Generated JSONL file with - 2700 max words, 75 samples - at ./dataset/gen-word-2700-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", + "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", + "Generated JSONL file with - 2920 max words, 75 samples - at ./dataset/gen-word-2920-count.jsonl\n", + "Generated JSONL file with - 2870 max words, 75 samples - at ./dataset/gen-word-2870-count.jsonl\n", + "Generated JSONL file with - 2940 max words, 75 samples - at ./dataset/gen-word-2940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", + "Generated JSONL file with - 2880 max words, 75 samples - at ./dataset/gen-word-2880-count.jsonl\n", + "Generated JSONL file with - 2890 max words, 75 samples - at ./dataset/gen-word-2890-count.jsonl\n", + "Generated JSONL file with - 2950 max words, 75 samples - at ./dataset/gen-word-2950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 75 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", + "Generated JSONL file with - 2990 max words, 75 samples - at ./dataset/gen-word-2990-count.jsonl\n", + "Generated JSONL file with - 2910 max words, 75 samples - at ./dataset/gen-word-2910-count.jsonl\n", + "Generated JSONL file with - 2970 max words, 75 samples - at ./dataset/gen-word-2970-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", + "Generated JSONL file with - 2980 max words, 75 samples - at ./dataset/gen-word-2980-count.jsonl\n", + "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", + "Generated JSONL file with - 2960 max words, 75 samples - at ./dataset/gen-word-2960-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", + "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", + "Generated JSONL file with - 2930 max words, 75 samples - at ./dataset/gen-word-2930-count.jsonl\n", + "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", + "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", + "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", + "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", + "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", + "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", + "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", + "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", + "\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", + "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", + "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", + "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", + "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", + "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", + "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", + "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4300 max words - at ./dataset/shuffle-word-4300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", + "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4125 max words - at ./dataset/shuffle-word-4125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", + "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", + "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", + "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", + "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", + "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonl\n", + "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", + "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", + "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", + "Generated JSONL file with - 4350 max words, 100 samples - at ./dataset/gen-word-4350-count.jsonl\n", + "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", + "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", + "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", + "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4375 max words - at ./dataset/shuffle-word-4375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4925 max words - at ./dataset/shuffle-word-4925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", + "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", + "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4700 max words - at ./dataset/shuffle-word-4700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", + "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", + "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", + "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", + "Generated JSONL file with - 4325 max words, 100 samples - at ./dataset/gen-word-4325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4775 max words - at ./dataset/shuffle-word-4775-count.jsonl\n", + "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", + "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", + "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", + "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonl\n", + "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", + "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5275 max words - at ./dataset/shuffle-word-5275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", + "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", + "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonlGenerated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", + "\n", + "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", + "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", + "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5850 max words - at ./dataset/shuffle-word-5850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", + "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", + "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonl\n", + "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", + "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", + "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", + "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", + "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", + "Generated JSONL file with - 4925 max words, 100 samples - at ./dataset/gen-word-4925-count.jsonl\n", + "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", + "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", + "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", + "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5875 max words - at ./dataset/shuffle-word-5875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", + "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", + "Generated JSONL file with - 5000 max words, 100 samples - at ./dataset/gen-word-5000-count.jsonl\n", + "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", + "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", + "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", + "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", + "Generated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", + "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", + "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", + "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", + "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", + "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", + "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", + "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", + "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", + "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", + "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", + "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", + "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", + "Generated JSONL file with - 5375 max words, 100 samples - at ./dataset/gen-word-5375-count.jsonl\n", + "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", + "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", + "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", + "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", + "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", + "Generated JSONL file with - 5700 max words, 100 samples - at ./dataset/gen-word-5700-count.jsonl\n", + "Generated JSONL file with - 5875 max words, 100 samples - at ./dataset/gen-word-5875-count.jsonl\n", + "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", + "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", + "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", + "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", + "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", + "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", + "## Done ##\n", + "total 1.8G\n", + "drwxr-xr-x 2 root root 40K Jan 23 22:18 .\n", + "drwxr-xr-x 5 root root 4.0K Jan 23 22:16 ..\n", + "-rw-r--r-- 1 root root 20K Jan 23 22:18 gen-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 106K Jan 23 22:18 gen-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 736K Jan 23 22:18 gen-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 738K Jan 23 22:18 gen-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 752K Jan 23 22:18 gen-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 750K Jan 23 22:18 gen-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 759K Jan 23 22:18 gen-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 778K Jan 23 22:18 gen-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 779K Jan 23 22:18 gen-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 786K Jan 23 22:18 gen-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 790K Jan 23 22:18 gen-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 798K Jan 23 22:18 gen-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 86K Jan 23 22:18 gen-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 805K Jan 23 22:18 gen-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 814K Jan 23 22:18 gen-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 822K Jan 23 22:18 gen-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 841K Jan 23 22:18 gen-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 838K Jan 23 22:18 gen-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 841K Jan 23 22:18 gen-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 848K Jan 23 22:18 gen-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 859K Jan 23 22:18 gen-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 865K Jan 23 22:18 gen-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 872K Jan 23 22:18 gen-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 95K Jan 23 22:18 gen-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 879K Jan 23 22:18 gen-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 893K Jan 23 22:18 gen-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 904K Jan 23 22:18 gen-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 893K Jan 23 22:18 gen-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 912K Jan 23 22:18 gen-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 918K Jan 23 22:18 gen-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 935K Jan 23 22:18 gen-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 938K Jan 23 22:18 gen-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 946K Jan 23 22:18 gen-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 952K Jan 23 22:18 gen-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 22:18 gen-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 956K Jan 23 22:18 gen-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 965K Jan 23 22:18 gen-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 968K Jan 23 22:18 gen-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 964K Jan 23 22:18 gen-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 980K Jan 23 22:18 gen-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 990K Jan 23 22:18 gen-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 990K Jan 23 22:18 gen-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1005K Jan 23 22:18 gen-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1017K Jan 23 22:18 gen-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1017K Jan 23 22:18 gen-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 110K Jan 23 22:18 gen-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 25K Jan 23 22:18 gen-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 115K Jan 23 22:18 gen-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 22:18 gen-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 125K Jan 23 22:18 gen-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 22:18 gen-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 133K Jan 23 22:18 gen-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 145K Jan 23 22:18 gen-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 22:18 gen-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 143K Jan 23 22:18 gen-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 22:18 gen-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 30K Jan 23 22:18 gen-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 153K Jan 23 22:18 gen-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 159K Jan 23 22:18 gen-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 22:18 gen-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 170K Jan 23 22:18 gen-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 22:18 gen-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 180K Jan 23 22:18 gen-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 22:18 gen-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 184K Jan 23 22:18 gen-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 34K Jan 23 22:18 gen-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 191K Jan 23 22:18 gen-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 22:18 gen-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 199K Jan 23 22:18 gen-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 gen-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 206K Jan 23 22:18 gen-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 209K Jan 23 22:18 gen-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 gen-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 220K Jan 23 22:18 gen-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 gen-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 39K Jan 23 22:18 gen-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 221K Jan 23 22:18 gen-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 22:18 gen-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 gen-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 gen-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 gen-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 236K Jan 23 22:18 gen-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 gen-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 gen-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 240K Jan 23 22:18 gen-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 gen-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 252K Jan 23 22:18 gen-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 22:18 gen-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 22:18 gen-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 258K Jan 23 22:18 gen-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 gen-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 43K Jan 23 22:18 gen-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 267K Jan 23 22:18 gen-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 gen-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 269K Jan 23 22:18 gen-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 22:18 gen-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 22:18 gen-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 22:18 gen-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 22:18 gen-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 277K Jan 23 22:18 gen-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 22:18 gen-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 22:18 gen-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 22:18 gen-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 285K Jan 23 22:18 gen-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 22:18 gen-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 296K Jan 23 22:18 gen-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 gen-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 gen-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 gen-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 gen-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 49K Jan 23 22:18 gen-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 298K Jan 23 22:18 gen-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 308K Jan 23 22:18 gen-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 22:18 gen-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 22:18 gen-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 22:18 gen-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 22:18 gen-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 314K Jan 23 22:18 gen-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 22:18 gen-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 22:18 gen-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 22:18 gen-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 22:18 gen-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 319K Jan 23 22:18 gen-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 22:18 gen-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 22:18 gen-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 22:18 gen-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 22:18 gen-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 328K Jan 23 22:18 gen-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 55K Jan 23 22:18 gen-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 336K Jan 23 22:18 gen-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 22:18 gen-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 341K Jan 23 22:18 gen-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 22:18 gen-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 22:18 gen-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 22:18 gen-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 351K Jan 23 22:18 gen-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 22:18 gen-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 358K Jan 23 22:18 gen-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 22:18 gen-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 22:18 gen-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 22:18 gen-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 362K Jan 23 22:18 gen-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 22:18 gen-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 22:18 gen-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 22:18 gen-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 22:18 gen-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 22:18 gen-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 58K Jan 23 22:18 gen-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 370K Jan 23 22:18 gen-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 22:18 gen-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 22:18 gen-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 22:18 gen-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 22:18 gen-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 388K Jan 23 22:18 gen-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 22:18 gen-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 22:18 gen-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 gen-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 gen-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 386K Jan 23 22:18 gen-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 gen-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 gen-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 386K Jan 23 22:18 gen-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 22:18 gen-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 22:18 gen-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 404K Jan 23 22:18 gen-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 22:18 gen-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 22:18 gen-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 22:18 gen-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 22:18 gen-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 63K Jan 23 22:18 gen-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 411K Jan 23 22:18 gen-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 22:18 gen-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 22:18 gen-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 22:18 gen-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 415K Jan 23 22:18 gen-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 22:18 gen-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 421K Jan 23 22:18 gen-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 22:18 gen-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 22:18 gen-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 22:18 gen-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 22:18 gen-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 435K Jan 23 22:18 gen-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 431K Jan 23 22:18 gen-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 22:18 gen-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 22:18 gen-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 22:18 gen-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 22:18 gen-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 71K Jan 23 22:18 gen-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 444K Jan 23 22:18 gen-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.8M Jan 23 22:18 gen-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 458K Jan 23 22:18 gen-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 468K Jan 23 22:18 gen-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 466K Jan 23 22:18 gen-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 471K Jan 23 22:18 gen-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 76K Jan 23 22:18 gen-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 479K Jan 23 22:18 gen-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 486K Jan 23 22:18 gen-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 495K Jan 23 22:18 gen-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 508K Jan 23 22:18 gen-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 499K Jan 23 22:18 gen-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 80K Jan 23 22:18 gen-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 521K Jan 23 22:18 gen-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 529K Jan 23 22:18 gen-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 533K Jan 23 22:18 gen-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 543K Jan 23 22:18 gen-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 548K Jan 23 22:18 gen-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 84K Jan 23 22:18 gen-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 553K Jan 23 22:18 gen-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 561K Jan 23 22:18 gen-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 568K Jan 23 22:18 gen-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 577K Jan 23 22:18 gen-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 583K Jan 23 22:18 gen-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 88K Jan 23 22:18 gen-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 595K Jan 23 22:18 gen-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 599K Jan 23 22:18 gen-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 612K Jan 23 22:18 gen-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 610K Jan 23 22:18 gen-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 618K Jan 23 22:18 gen-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 93K Jan 23 22:18 gen-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 623K Jan 23 22:18 gen-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 639K Jan 23 22:18 gen-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 639K Jan 23 22:18 gen-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 642K Jan 23 22:18 gen-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 668K Jan 23 22:18 gen-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 98K Jan 23 22:18 gen-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 662K Jan 23 22:18 gen-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 667K Jan 23 22:18 gen-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 672K Jan 23 22:18 gen-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 677K Jan 23 22:18 gen-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 700K Jan 23 22:18 gen-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 101K Jan 23 22:18 gen-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 695K Jan 23 22:18 gen-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 707K Jan 23 22:18 gen-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 719K Jan 23 22:18 gen-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 719K Jan 23 22:18 gen-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 733K Jan 23 22:18 gen-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 22:18 shuffle-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 22:18 shuffle-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 shuffle-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 shuffle-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 22:18 shuffle-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 22:18 shuffle-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 22:18 shuffle-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 22:18 shuffle-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 22:18 shuffle-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 22:18 shuffle-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 22:18 shuffle-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 shuffle-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 shuffle-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 7.9M Jan 23 22:18 shuffle-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 22:18 shuffle-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 shuffle-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 22:18 shuffle-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 22:18 shuffle-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 shuffle-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 shuffle-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 shuffle-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 22:18 shuffle-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 22:18 shuffle-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 22:18 shuffle-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 22:18 shuffle-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 22:18 shuffle-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 12K Jan 23 22:18 word-2-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 22:18 word-4-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 3000 words dataset\n", + "# \n", + "for i in {110..3000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 3000+ - 400 words dataset\n", + "# \n", + "for i in {3025..6000..25} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "da287711", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolving data files: 100%|███████████████| 862/862 [00:00<00:00, 107543.06it/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 36851/36851 [00:01<00:00, 19869.65 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 1547/1547 [00:00<00:00, 30397.64 exampl\n" + ] + } + ], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "07d1bf84", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "03c6af10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-23 22:32:27,860] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-7B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-7B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.devices=auto', '--trainer.microbatch_size=4', '--model.ctx_len=8192'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-7B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-7B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.devices=auto', '--trainer.microbatch_size=4', '--model.ctx_len=8192'].\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 1430867974\n", + "Seed set to 1430867974\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 256\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 4\n", + " - accumulate_grad_batches: 8\n", + " - effective_batch_size: 256\n", + "\n", + "[rank: 0] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-23 22:33:36,801] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,802] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 22:33:36,803] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 3] Seed set to 1430867974\n", + "[rank: 5] Seed set to 1430867974\n", + "[rank: 2] Seed set to 1430867974\n", + "[rank: 6] Seed set to 1430867974\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 7] Seed set to 1430867974\n", + "[rank: 1] Seed set to 1430867974\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 4] Seed set to 1430867974\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 2] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 1] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 3] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 4] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 7] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 6] Seed set to 1430867974\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_223508-bdvkilfd\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8xA100] RWKV-v5-7B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_2)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/bdvkilfd\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 2.000e-04 (0.0002)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.016431331634521484 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10186290740966797 seconds\n", + "Time to load fused_adam op: 0.10216116905212402 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10233497619628906 seconds\n", + "Time to load fused_adam op: 0.10216617584228516 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10257244110107422 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.1023705005645752 seconds\n", + "Time to load fused_adam op: 0.10263657569885254 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 268 M \n", + "1 | blocks | ModuleList | 7.0 B \n", + "2 | ln_out | LayerNorm | 8.2 K \n", + "3 | head | Linear | 268 M \n", + "--------------------------------------\n", + "7.5 B Trainable params\n", + "0 Non-trainable params\n", + "7.5 B Total params\n", + "30,072.177Total estimated model params size (MB)\n", + "Epoch 0: 17%|▏| 200/1152 [43:43<3:28:08, 0.08it/s, v_num=ilfd, train/loss=0.00/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|█| 1152/1152 [3:52:43<00:00, 0.08it/s, v_num=ilfd, train/loss=0.7\n", + "Validation: | | 0/? [00:00 [verbose/csv-file-path] [from_token_count] [to_token_count]") + sys.exit(1) + + # Verbose mode + verbose = False + csv_file_path = None + if len(sys.argv) >= 3: + if sys.argv[2] == "verbose": + verbose = True + elif sys.argv[2] == "none": + csv_file_path = None + else: + csv_file_path = sys.argv[2] + + from src.model import SimpleRWKV + model_path = sys.argv[1] + model = SimpleRWKV(model_path, device="cuda") + + # The evaluation size range + MAX_TOKENS = 1000 + + # Get the cursed " on" token (this happens only in some models) + on_token = model.encode(" on")[0] + markdown_token = model.encode("```")[0] + newline_token = model.encode("\n")[0] + + # Pipeline args to use + token_ban = [on_token] # ban the generation of some tokens + + # Read the test word list, taken from ./eval_word_list.txt + with open(os.path.join(SCRIPT_DIR,'./eval_word_list.txt'), 'r') as f: + test_word_list = f.read() + + # Open the CSV file, to write into + if csv_file_path != None: + # Ensure parent dir is in place + csv_file_dir = os.path.dirname(csv_file_path) + if not os.path.exists(csv_file_dir): + os.makedirs(csv_file_dir) + + # Open the CSV file + csv_file_handle = await aiofiles.open(csv_file_path, 'w', encoding="utf-8", newline="") + csv_writer = AsyncWriter(csv_file_handle, dialect="unix") + + # Write the header + await csv_writer.writerow([ + 'eval_token_count', 'token_idx', 'matched', + 'top_token_str', 'top_token_percentage', + 'eval_token_str', 'eval_token_pos', 'eval_token_percentage', + 'is_random_baseline' + ]) + else: + csv_writer = None + + # Convert it to tokens + test_word_tokens = model.encode(test_word_list) + + # Prompt template prefix to use + prompt_prefix = "Instruction: Repeat this text exactly as it is\n\nInput:\n```\n" + prompt_suffix = "\n```\n\n" + reply_prefix = "Response:\n```\n" + reply_suffix = "\n```\n" + + # Process the prompt prefix + prompt_prefix_logits, prompt_prefix_state = model.forward(model.encode(prompt_prefix), None) + mid_segment_tokens = model.encode(prompt_suffix+reply_prefix) + + # Function use to get words with the following token count + def get_words_tokens_with_token_count(token_count): + target_tokens = test_word_tokens[:token_count] + target_words = model.decode(target_tokens) + + # Normalize to lowercase + target_words = target_words.lower() + return target_words + + # Function for validating once the model at a specific token count + async def validate_model(token_count, withoutInstructAndInput=False): + # Start the performance timer + start_time = time.time() + # print(f"-- Validating model for token count: ", token_count) + + # Get the target tokens + target_tokens = test_word_tokens[:token_count] + + # Validate that hte token list match the target token count (throw an error if not) + if len(target_tokens) != token_count: + raise Exception("Target tokens count mismatch - target is probably larger then the eval word list") + + logits = None + state = None + + # We validate with, the instruct and input + # having the option to disable this, helps us have a randomized baseline score + if withoutInstructAndInput == True: + # Because we actuall need a logit to start with, we compromise with a new line at minimum + first_logits, state = model.forward([newline_token], state) + else: + # Clone the state + state = copy.deepcopy(prompt_prefix_state) + + # Compute the document to memorize + logits, state = model.forward(target_tokens, state) + + # Compute the mid segment + first_logits, state = model.forward(mid_segment_tokens, state) + + # Score counter + matched_tokens = 0 + + # CSV rows to write + csv_rows = [] + + # Common validation function + # ---- + + async def validateToken(sorted_probs, sorted_indices, softmax_arr, tokenIdx, match_count = 0): + # Get the top token info + top_token = sorted_indices[0].item() + top_prob = sorted_probs[0].item() + + # Check if the token matches, and score it + target = target_tokens[tokenIdx] + if top_token == target: + match_count += 1 + + # Find the target token position + if verbose or csv_writer != None: + target_prob = softmax_arr[target].item() + target_pos = 0 + for i in range(len(sorted_indices)): + if sorted_indices[i].item() == target: + target_pos = i + break + + # Get top_token_str & target_token_str, but because an error can happen, we catch it + try: + top_token_str = model.decode([top_token]).encode('unicode_escape').decode('utf-8') + except: + top_token_str = "" + try: + target_token_str = model.decode([target]).encode('unicode_escape').decode('utf-8') + except: + target_token_str = "" + + # Print the results, for verbose + if verbose: + if top_token == target: + print(f' - token {i} (hit) : "{top_token_str}" ({top_prob*100:.2f}%)') + else: + print(f' - token {i} (miss): "{top_token_str}" ({top_prob*100:.2f}%) | "{target_token_str}" pos={target_pos} ({target_prob*100:.2f}%)') + + # Log it to CSV file if enabled + if csv_writer != None: + # We need to encode the strings safely (escape special characters, new lines, etc) + csv_rows.append([ + token_count, tokenIdx, top_token == target, + top_token_str, top_prob, + target_token_str, target_pos, target_prob, + withoutInstructAndInput == True + ]) + + # Return matched count + return match_count + + # Lets validate the first logits + # ---- + + # Apply token ban + for n in token_ban: + first_logits[n] = -float('inf') + + # Validate the first token (special case) + first_logits = torch.softmax(first_logits, dim=-1) + sorted_probs, sorted_indices = torch.sort(first_logits, descending=True, stable=True, dim=-1) + matched_tokens = await validateToken(sorted_probs, sorted_indices, first_logits, 0) + + # Print the timing till now + # print(f"-- Finished validating first token ({time.time() - start_time:.2f}s)") + + # Loop through the target tokens in set of 1000 + # ---- + for subsetPos in range(0, token_count, 1000): + + # Get the subset, and forward it + token_subset = target_tokens[subsetPos:subsetPos+1000] + subset_logits, state = model.forward(token_subset, state, all_logits=True) + + # Apply the token ban + for n in token_ban: + subset_logits[:,n] = -float('inf') + + # Sort via GPU + subset_logits = subset_logits.to('cuda') + subset_logits = torch.softmax(subset_logits, dim=-1) + sorted_probs, sorted_indices = torch.sort(subset_logits, descending=True, stable=True, dim=-1) + + # Convert back to CPU land + sorted_probs = sorted_probs.to('cpu') + sorted_indices = sorted_indices.to('cpu') + + # Loop through the subset + for i in range(len(token_subset)): + pos = i+1+subsetPos + if pos <= len(target_tokens)-1: + matched_tokens = await validateToken(sorted_probs[i], sorted_indices[i], subset_logits[i], pos, matched_tokens) + + # Garbage collect + gc.collect() + torch.cuda.empty_cache() + + # # Forward all the target tokens in a single pass + # # --- + # all_logits, state = model.forward(target_tokens, state, all_logits=True) + # # print(f"-- Finished multi-token forward pass ({time.time() - start_time:.2f}s)") + + # # Extract the sorted values, and cast them to CPU + # # --- + # # Apply token ban + # for n in token_ban: + # all_logits[:,n] = -float('inf') + + # # GPU based sort + # all_logits = all_logits.to('cuda') + # all_logits = torch.softmax(all_logits, dim=-1) + # sorted_probs, sorted_indices = torch.sort(all_logits, descending=True, stable=True, dim=-1) + + # # Convert back to CPU land + # sorted_probs = sorted_probs.to('cpu') + # sorted_indices = sorted_indices.to('cpu') + + # # print(f"-- Finished sorting logits ({time.time() - start_time:.2f}s)") + + # # Lets evaluate the logits, and check if they match one by one + # for i in range(len(target_tokens)-1): + # # Validate the token + # matched_tokens = await validateToken(sorted_probs[i], sorted_indices[i], all_logits[i], i+1, matched_tokens) + + # print(f"-- Finished token matching ({time.time() - start_time:.2f}s)") + + # Write the CSV rows + if csv_writer != None: + await csv_writer.writerows(csv_rows) + + # print(f"-- Finished CSV write ({time.time() - start_time:.2f}s)") + + # Percentage token match + matched_percentage = matched_tokens / token_count * 100.0 + + # Print the results + if withoutInstructAndInput == False: + print(f'## Model validation for {token_count} tokens : {matched_percentage}% similarity, with {matched_tokens} matched token, and {token_count - matched_tokens} token mismatch') + else: + print(f"## Finished baseline model to eval output predictive matching (aka 0 memory?), for {MAX_TOKENS} tokens") + + if verbose: + print("## ------------------ ") + + # # Print more info if there are differences + # if(char_diff_count > 0): + # print("--- target ---") + # print(target_words) + # print("--- completion ---") + # print(completion) + # print("------------------") + + # Print the start of model validation + print("###") + print("### Model validation start ###") + print("###") + + # Check if its an extended eval set + if len(sys.argv) == 4: + EXTENDED_EVAL = True + + # Get the int value from sys.argv[3] + MAX_TOKENS = int(sys.argv[3]) + MIN_TOKENS = 1100 + elif len(sys.argv) == 5: + EXTENDED_EVAL = True + + # Get the int value from sys.argv[3]/[4] + MIN_TOKENS = int(sys.argv[3]) + MAX_TOKENS = int(sys.argv[4]) + else: + EXTENDED_EVAL = False + + # Validate the model at different token counts + if EXTENDED_EVAL == False: + # We validate in increments of 5, from 5 to 150 + for i in range(5, 150, 5): + await validate_model(i) + + # We validate in increments of 10 from 150 to 300 + for i in range(150, 300, 10): + await validate_model(i) + + # We validate in increments of 25 from 300 to 700 + for i in range(300, 700, 25): + await validate_model(i) + + # We validate in increments of 50 from 700 to MAXTOKEN (inclusive) + for i in range(700, MAX_TOKENS+1, 50): + await validate_model(i) + + # Lets do the baseline + if csv_file_path != None: + await validate_model(MAX_TOKENS, withoutInstructAndInput=True) + + else: + # We validate in increments of 100 from 8000 to MAXTOKEN (inclusive) + if MAX_TOKENS > 8000: + for i in range(MIN_TOKENS, MAX_TOKENS+1, 100): + await validate_model(i) + else: + for i in range(MIN_TOKENS, MAX_TOKENS+1, 50): + await validate_model(i) + + # Print the end of model validation + print("###") + print("### Model validation end ###") + print("###") + +if __name__ == '__main__': + asyncio.run(main_function()) \ No newline at end of file diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/run-all-finetune.sh b/notebook/rwkv-x-exp/v5-exp/memory-test/run-all-finetune.sh new file mode 100755 index 00000000..ac26dd1e --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/run-all-finetune.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +papermill \ + -k python3 --log-output \ + "./World-7B-mem-finetune.ipynb" "./World-7B-mem-finetune.output.ipynb" + +papermill \ + -k python3 --log-output \ + "./World-3B-mem-finetune.ipynb" "./World-3B-mem-finetune.output.ipynb" + +papermill \ + -k python3 --log-output \ + "./World-1B5-mem-finetune.ipynb" "./World-1B5-mem-finetune.output.ipynb" diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml new file mode 100644 index 00000000..1a02fe57 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-1-tune.yaml @@ -0,0 +1,411 @@ +# lightning.pytorch==2.0.2 +seed_everything: true +trainer: + # Configure the number of GPU, avaliable on your machine + accelerator: gpu + devices: auto + num_nodes: 1 + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_1 + + # Floating point precision for the model, because RWKV is built FOR bf16 + # you should pretty much never change this setting + precision: bf16 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'Stage-1-memory-finetune-1 (bs=256, train-ctx=512)' + # name: 'Echo-B-1B4 Foundation' + project: 'RWKV-Memory-Experiment' + tags: ['RWKV', 'memory-exp'] + id: null + save_dir: . + version: null + offline: false + dir: null + anonymous: null + log_model: false + experiment: null + prefix: '' + checkpoint_name: null + job_type: null + config: null + entity: null + reinit: null + group: null + notes: null + magic: null + config_exclude_keys: null + config_include_keys: null + mode: null + allow_val_change: null + resume: null + force: null + tensorboard: null + sync_tensorboard: null + monitor_gym: null + save_code: null + settings: null + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/v5-exp/memory-test/stage-1-memory-finetune/ + filename: null + + # Save the top/last K checkpoints + save_top_k: 3 + # Choose by the most recent checkpoints (time based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: true + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: 25 + every_n_epochs: null + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: 1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + target_batch_size: 256 + + # # Number of datasamples to accumulate before backproping, per GPU + # # this can't be used with target_batch_size. + # accumulate_grad_batches: -1 + + # Various other settings, you probably want to leave alone + fast_dev_run: false + limit_train_batches: null + limit_val_batches: null + limit_test_batches: null + limit_predict_batches: null + overfit_batches: 0.0 + val_check_interval: null + check_val_every_n_epoch: 1 + num_sanity_val_steps: 0 + log_every_n_steps: 1 + enable_checkpointing: null + enable_progress_bar: null + enable_model_summary: null + gradient_clip_val: 1.0 + gradient_clip_algorithm: null + deterministic: null + benchmark: null + inference_mode: true + use_distributed_sampler: true + profiler: null + detect_anomaly: false + barebones: false + plugins: null + sync_batchnorm: false + reload_dataloaders_every_n_epochs: 0 + default_root_dir: null + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/Echo-B-1B4-Stage2.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 8192 + + # Learning rate of the training process + # --- + # Initia learning rate of the process + lr_init: 3e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + # + # NOTE: lr_final / lr_period does not work with warmup_steps + # and will be ignored (or replaced) with the warmup_steps logic instead + lr_final: 2e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # Experimental cutoff settings + # --- + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + # + # Leave it as a blank array to disable the feature + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + # + # Leave it as a blank array to disable the feature + ctx_len_warmup_steps: [] + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # We limit bptt_learning_range, to 1, to ensure high throughput within a multi-gpu setup. + # (by skipping some syncronization code). Additionally, as bptt learning should not be triggering + # anyway as the data sample should be within ctx size 99% of the time + bptt_learning: true + bptt_learning_range: 1 + + # various other settings you probably should leave alone + grad_cp: true + warmup_steps: -1 + layerwise_lr: true + dim_att: null + dim_ffn: null + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Skip tokens that are already learnt, and are below the target threshold + token_loss_threshold: 0.02 + + # Perform token based drop out at random, to the target % rate + # token_dropout_rate: 0.0 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/v5-exp/memory-test/stage-1/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: json + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: ../notebook/rwkv-x-exp/v5-exp/memory-test/dataset + + # After loading the dataset, split out test data used for validation, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.005 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: -1 + max_token_size: 4096 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 2048 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: false + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['input_prefix', 'input', 'output_prefix', 'output', 'closing'] + # multi_column_prefix: ['', '', '', '', ''] + # multi_column_train_mask: [true, false, true, true, true] + # multi_column_separator: '' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_mask: false + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Prefix token masking + # + # The rationale behind this, is that the first X tokens should not be "backpropped" + # for any new training record. As its unfair to expect the model (or a human) make + # any resonable guesses at that stage. As such this is used to "mask" the first X tokens + # from the loss calculation, and thus not backpropped. + data_prefix_skip_mask: 0 + + # Additional source dataset params, used to grab subsets of the dataset + # --- + # source_dataset_params: + # language: en + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 256 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: true + + # ---------------------------- + # Sort before packing, and reverse before saving + # ---------------------------- + + # Sort the dataset by length, useful to reduce gpu waiting time (also useful for RWKV long context coherence) + sort_by_length: true + sort_asc: false # Sort in ascending order, true = shortest first, false = longest first + + # Reverse the training dataset order before saving, this is useful for, + # optimizing dataset packing process, when using packing_in_sequence + # and sort_by_length desc order together + reverse_train_dataset_before_save: true + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/xyz/last.ckpt +ckpt_path: null diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml new file mode 100644 index 00000000..e6b8fcf6 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml @@ -0,0 +1,411 @@ +# lightning.pytorch==2.0.2 +seed_everything: true +trainer: + # Configure the number of GPU, avaliable on your machine + accelerator: gpu + devices: auto + num_nodes: 1 + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_1 + + # Floating point precision for the model, because RWKV is built FOR bf16 + # you should pretty much never change this setting + precision: bf16 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'stage-2-memory-finetune-1 (bs=256, train-ctx=512)' + # name: 'Echo-B-1B4 Foundation' + project: 'RWKV-Memory-Experiment' + tags: ['RWKV', 'memory-exp'] + id: null + save_dir: . + version: null + offline: false + dir: null + anonymous: null + log_model: false + experiment: null + prefix: '' + checkpoint_name: null + job_type: null + config: null + entity: null + reinit: null + group: null + notes: null + magic: null + config_exclude_keys: null + config_include_keys: null + mode: null + allow_val_change: null + resume: null + force: null + tensorboard: null + sync_tensorboard: null + monitor_gym: null + save_code: null + settings: null + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/v5-exp/memory-test/stage-2-memory-finetune/ + filename: null + + # Save the top/last K checkpoints + save_top_k: 3 + # Choose by the most recent checkpoints (time based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: true + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: 25 + every_n_epochs: null + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: 1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + target_batch_size: 256 + + # # Number of datasamples to accumulate before backproping, per GPU + # # this can't be used with target_batch_size. + # accumulate_grad_batches: -1 + + # Various other settings, you probably want to leave alone + fast_dev_run: false + limit_train_batches: null + limit_val_batches: null + limit_test_batches: null + limit_predict_batches: null + overfit_batches: 0.0 + val_check_interval: null + check_val_every_n_epoch: 1 + num_sanity_val_steps: 0 + log_every_n_steps: 1 + enable_checkpointing: null + enable_progress_bar: null + enable_model_summary: null + gradient_clip_val: 1.0 + gradient_clip_algorithm: null + deterministic: null + benchmark: null + inference_mode: true + use_distributed_sampler: true + profiler: null + detect_anomaly: false + barebones: false + plugins: null + sync_batchnorm: false + reload_dataloaders_every_n_epochs: 0 + default_root_dir: null + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/Echo-B-1B4-Stage2.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 8192 + + # Learning rate of the training process + # --- + # Initia learning rate of the process + lr_init: 2e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + # + # NOTE: lr_final / lr_period does not work with warmup_steps + # and will be ignored (or replaced) with the warmup_steps logic instead + lr_final: 1e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # Experimental cutoff settings + # --- + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + # + # Leave it as a blank array to disable the feature + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + # + # Leave it as a blank array to disable the feature + ctx_len_warmup_steps: [] + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # We limit bptt_learning_range, to 1, to ensure high throughput within a multi-gpu setup. + # (by skipping some syncronization code). Additionally, as bptt learning should not be triggering + # anyway as the data sample should be within ctx size 99% of the time + bptt_learning: true + bptt_learning_range: 1 + + # various other settings you probably should leave alone + grad_cp: true + warmup_steps: -1 + layerwise_lr: true + dim_att: null + dim_ffn: null + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Skip tokens that are already learnt, and are below the target threshold + token_loss_threshold: 0.02 + + # Perform token based drop out at random, to the target % rate + # token_dropout_rate: 0.0 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/v5-exp/memory-test/stage-2/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: json + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: ../notebook/rwkv-x-exp/v5-exp/memory-test/dataset + + # After loading the dataset, split out test data used for validation, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.005 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: -1 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 2048 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: false + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['input_prefix', 'input', 'output_prefix', 'output', 'closing'] + # multi_column_prefix: ['', '', '', '', ''] + # multi_column_train_mask: [true, false, true, true, true] + # multi_column_separator: '' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_mask: false + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Prefix token masking + # + # The rationale behind this, is that the first X tokens should not be "backpropped" + # for any new training record. As its unfair to expect the model (or a human) make + # any resonable guesses at that stage. As such this is used to "mask" the first X tokens + # from the loss calculation, and thus not backpropped. + data_prefix_skip_mask: 0 + + # Additional source dataset params, used to grab subsets of the dataset + # --- + # source_dataset_params: + # language: en + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 256 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: true + + # ---------------------------- + # Sort before packing, and reverse before saving + # ---------------------------- + + # Sort the dataset by length, useful to reduce gpu waiting time (also useful for RWKV long context coherence) + sort_by_length: true + sort_asc: false # Sort in ascending order, true = shortest first, false = longest first + + # Reverse the training dataset order before saving, this is useful for, + # optimizing dataset packing process, when using packing_in_sequence + # and sort_by_length desc order together + reverse_train_dataset_before_save: true + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/xyz/last.ckpt +ckpt_path: null diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-100k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-100k.ipynb new file mode 100644 index 00000000..c2714362 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/1B5-baseline-run-100k.ipynb @@ -0,0 +1,24504 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1B5 Baseline Run\n", + "\n", + "Without any experimental tweaks" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"infctx-v5-selective-loss\"\n", + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-1B5-world.bin\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/RWKV-5-World-1B5-v2-20231025-ctx4096.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File ‘RWKV-v5-1B5-world.bin’ already there; not retrieving.\n" + ] + } + ], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving the dataset (1/1 shards): 100%|█| 613/613 [00:00<00:00, 11158.16 examples\n", + "Saving the dataset (1/1 shards): 100%|█| 803/803 [00:00<00:00, 28408.98 examples\n" + ] + } + ], + "source": [ + "# Lets preload the requried dataset \n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/config/1B5-enwiki_100k-world-packing.yaml\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multi-epoch training" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-21 07:59:19,967] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=2048', '--trainer.devices=auto'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml', '--model.load_model=../model/RWKV-v5-1B5-world.bin', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--model.ctx_len=2048', '--trainer.devices=auto'].\n", + "Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 128\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 2\n", + " - effective_batch_size: 128\n", + "\n", + "[rank: 0] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-21 07:59:41,746] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,749] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,756] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,757] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,758] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,759] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-21 07:59:41,759] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 1] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 6] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 3] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 5] Seed set to 3941088705\n", + "[rank: 2] Seed set to 3941088705\n", + "[rank: 4] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 7] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 1] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 2] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 3] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 6] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 5] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 7] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240121_080019-vs7gwnia\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33minfctx-v5-selective-loss - 1B5 - Baseline (packsize=16k, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/vs7gwnia\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 1.000e-04 (0.0001)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.08217287063598633 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10234260559082031 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10240006446838379 seconds\n", + "Time to load fused_adam op: 0.10246396064758301 seconds\n", + "Time to load fused_adam op: 0.10230851173400879 seconds\n", + "Time to load fused_adam op: 0.10246992111206055 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10248088836669922 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10278725624084473 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (10) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 10/10 [02:30<00:00, 0.07it/s, v_num=wnia, train/loss=2.220]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 1.000e-04 (0.0001)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.08217287063598633 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10234260559082031 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10240006446838379 seconds\n", + "Time to load fused_adam op: 0.10246396064758301 seconds\n", + "Time to load fused_adam op: 0.10230851173400879 seconds\n", + "Time to load fused_adam op: 0.10246992111206055 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10248088836669922 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10278725624084473 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (10) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 10/10 [02:30<00:00, 0.07it/s, v_num=wnia, train/loss=2.220]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 1.000e-04 (0.0001)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.08153295516967773 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10219526290893555 seconds\n", + "Time to load fused_adam op: 0.10224127769470215 seconds\n", + "Time to load fused_adam op: 0.10227823257446289 seconds\n", + "Time to load fused_adam op: 0.10226273536682129 seconds\n", + "Time to load fused_adam op: 0.10217475891113281 seconds\n", + "Time to load fused_adam op: 0.10245966911315918 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10364770889282227 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (10) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 10/10 [02:52<00:00, 0.06it/s, v_num=xvdu, train/loss=2.050]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 1.000e-04 (0.0001)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.08153295516967773 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10219526290893555 seconds\n", + "Time to load fused_adam op: 0.10224127769470215 seconds\n", + "Time to load fused_adam op: 0.10227823257446289 seconds\n", + "Time to load fused_adam op: 0.10226273536682129 seconds\n", + "Time to load fused_adam op: 0.10217475891113281 seconds\n", + "Time to load fused_adam op: 0.10245966911315918 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10364770889282227 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 134 M \n", + "1 | blocks | ModuleList | 1.3 B \n", + "2 | ln_out | LayerNorm | 4.1 K \n", + "3 | head | Linear | 134 M \n", + "--------------------------------------\n", + "1.6 B Trainable params\n", + "0 Non-trainable params\n", + "1.6 B Total params\n", + "6,311.018 Total estimated model params size (MB)\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (10) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Epoch 0: 100%|████| 10/10 [02:52<00:00, 0.06it/s, v_num=xvdu, train/loss=2.050]\n", + "Validation: | | 0/? [00:00] 14.00G 13.5MB/s in 16m 12s \n", + "\n", + "2024-01-20 06:05:14 (14.8 MB/s) - ‘RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth?download=true’ saved [15036197229/15036197229]\n", + "\n" + ] + } + ], + "source": [ + "# Lets wget the model files\n", + "!mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Map (num_proc=160): 100%|███| 1000000/1000000 [00:10<00:00, 95906.26 examples/s]\n", + "Filter (num_proc=160): 100%|█| 1000000/1000000 [00:05<00:00, 179927.28 examples/\n", + "Map (num_proc=160): 100%|█████| 120800/120800 [00:03<00:00, 37916.96 examples/s]\n", + "Map (num_proc=160): 100%|█████| 120800/120800 [00:05<00:00, 21204.90 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 18147/18147 [00:04<00:00, 4124.48 examp\n", + "Saving the dataset (1/1 shards): 100%|█| 13423/13423 [00:00<00:00, 25885.58 exam\n" + ] + } + ], + "source": [ + "# Lets preload the requried dataset \n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-packing.yaml\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multi-epoch training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-packing.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --data.skip_datapath_setup=True \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/selective-loss/baseline/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - 7B - Baseline (packsize=16k, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.target_batch_size=512 \\\n", + " --trainer.microbatch_size=10 \\\n", + " --model.ctx_len=4096 \\\n", + " --trainer.devices=\"{GPU_DEVICES}\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rwkv-infctx", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_100k-world-packing.yaml b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_100k-world-packing.yaml new file mode 100644 index 00000000..5f8bd1ab --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_100k-world-packing.yaml @@ -0,0 +1,298 @@ +# lightning.pytorch==2.0.2 +seed_everything: 3941088705 +trainer: + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_2 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'infctx-v5-unit-test-baseline (train-ctx=1024, data-ctx=16k-packing)' + project: 'RWKV-X-SLoss' + tags: ['RWKV', 'infctx'] + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/baseline + filename: null + + # Save the top/last K checkpoints + save_top_k: 1 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 1e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 1e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_100k-world-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_100k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.05 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 1024 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml new file mode 100644 index 00000000..4ec5aff0 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-enwiki_10k-world-packing.yaml @@ -0,0 +1,298 @@ +# lightning.pytorch==2.0.2 +seed_everything: 3941088705 +trainer: + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_2 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'infctx-v5-unit-test-baseline (train-ctx=1024, data-ctx=16k-packing)' + project: 'RWKV-X-SLoss' + tags: ['RWKV', 'infctx'] + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/baseline + filename: null + + # Save the top/last K checkpoints + save_top_k: 1 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 1e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 1e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_10k-world-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_10k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.1 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 64 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_100k-world-packing.yaml b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_100k-world-packing.yaml new file mode 100644 index 00000000..6bbe3add --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_100k-world-packing.yaml @@ -0,0 +1,319 @@ +# lightning.pytorch==2.0.2 +seed_everything: 3941088705 +trainer: + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_2 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'infctx-v5-unit-test-baseline (train-ctx=1024, data-ctx=16k-packing)' + project: 'RWKV-X-SLoss' + tags: ['RWKV', 'infctx'] + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/sloss + filename: null + + # Save the top/last K checkpoints + save_top_k: 1 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 1e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 1e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Skip tokens that are already learnt, and are below the target threshold + token_loss_threshold: 1.0 + # Perform token based drop out at random, to the target % rate + token_dropout_rate: 0.05 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_100k-world-sloss-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_100k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.05 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 1024 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Prefix token masking + # + # The rationale behind this, is that the first X tokens should not be "backpropped" + # for any new training record. As its unfair to expect the model (or a human) make + # any resonable guesses at that stage. As such this is used to "mask" the first X tokens + # from the loss calculation, and thus not backpropped. + data_prefix_skip_mask: 8 + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_10k-world-packing.yaml b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_10k-world-packing.yaml new file mode 100644 index 00000000..b18604c1 --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/config/1B5-sloss-enwiki_10k-world-packing.yaml @@ -0,0 +1,319 @@ +# lightning.pytorch==2.0.2 +seed_everything: 3941088705 +trainer: + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_2 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'infctx-v5-unit-test-baseline (train-ctx=1024, data-ctx=16k-packing)' + project: 'RWKV-X-SLoss' + tags: ['RWKV', 'infctx'] + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/sloss + filename: null + + # Save the top/last K checkpoints + save_top_k: 1 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 1e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 1e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 1 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Skip tokens that are already learnt, and are below the target threshold + token_loss_threshold: 1.0 + # Perform token based drop out at random, to the target % rate + token_dropout_rate: 0.05 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_10k-world-sloss-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_10k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.1 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 64 + max_token_size: 8192 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Selective loss training + # ---------------------------- + + # Prefix token masking + # + # The rationale behind this, is that the first X tokens should not be "backpropped" + # for any new training record. As its unfair to expect the model (or a human) make + # any resonable guesses at that stage. As such this is used to "mask" the first X tokens + # from the loss calculation, and thus not backpropped. + data_prefix_skip_mask: 8 + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 2048 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 8192 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null diff --git a/notebook/rwkv-x-exp/v5-exp/selective-loss/sloss-run-100k.ipynb b/notebook/rwkv-x-exp/v5-exp/selective-loss/sloss-run-100k.ipynb index 3593220a..91c370be 100644 --- a/notebook/rwkv-x-exp/v5-exp/selective-loss/sloss-run-100k.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/selective-loss/sloss-run-100k.ipynb @@ -11,9 +11,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], "source": [ "GPU_DEVICES=\"auto\"\n", "ENABLE_WANDB=True\n", @@ -45,9 +57,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-19 05:28:33,486] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "---- Initializing model ----\n", + "No of layers: 8\n", + "Embedding size: 512\n", + "Output model path: ../model/L8-D512-world-v5-init.pth\n", + "Vocab size: 65536\n", + "Emb scale: 0.0001\n", + "Note: this process takes a significant time (and ram) for large models\n", + "---- ----- ----\n", + "Model exists, skipping init_model\n" + ] + } + ], "source": [ "# Lets initialized the model with the init_model.py code\n", "!cd \"{TRAINER_DIR}\" && python3 init_model.py \\\n", @@ -59,9 +89,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Map (num_proc=255): 100%|███| 1000000/1000000 [00:38<00:00, 25944.52 examples/s]\n", + "Filter (num_proc=255): 100%|█| 1000000/1000000 [00:16<00:00, 60006.93 examples/s\n", + "Map (num_proc=255): 100%|██████| 120800/120800 [00:16<00:00, 7317.51 examples/s]\n", + "Map (num_proc=255): 100%|██████| 120800/120800 [00:18<00:00, 6598.04 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 18201/18201 [00:01<00:00, 12987.11 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 13423/13423 [00:00<00:00, 40290.59 exam\n" + ] + } + ], "source": [ "# Lets preload the requried dataset \n", "!cd \"{TRAINER_DIR}\" && \\\n", @@ -79,7 +122,38556 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-19 05:33:08,562] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/sloss-enwiki_100k-world-16k-packing.yaml', '--model.load_model=../model/L8-D512-world-v5-init.pth', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - Selective Loss (trainsize=2k,packsize=16k) - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--trainer.devices=auto'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/selective-loss/config/sloss-enwiki_100k-world-16k-packing.yaml', '--model.load_model=../model/L8-D512-world-v5-init.pth', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=../checkpoint/selective-loss/baseline/', '--trainer.logger.init_args.name=infctx-v5-selective-loss - Selective Loss (trainsize=2k,packsize=16k) - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.target_batch_size=128', '--trainer.microbatch_size=8', '--trainer.devices=auto'].\n", + "Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 128\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 2\n", + " - effective_batch_size: 128\n", + "\n", + "[rank: 0] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-19 05:33:17,448] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:17,449] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:17,449] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:17,450] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:18,395] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:18,409] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-19 05:33:18,450] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 1] Seed set to 3941088705\n", + "[rank: 6] Seed set to 3941088705\n", + "[rank: 5] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 4] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 7] Seed set to 3941088705\n", + "[rank: 3] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 2] Seed set to 3941088705\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 6] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 1] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 5] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "[rank: 2] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 7] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 3] Seed set to 3941088705\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240119_053338-0tw1sln7\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33minfctx-v5-selective-loss - Selective Loss (trainsize=2k,packsize=16k) - (deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-SLoss/runs/0tw1sln7\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 3.000e-04 (0.0003)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Creating extension directory /root/.cache/torch_extensions/py310_cu121/fused_adam...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "[1/3] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/includes -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam -isystem /usr/local/lib/python3.10/dist-packages/torch/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.10/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_89,code=compute_89 -gencode=arch=compute_89,code=sm_89 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_89,code=sm_89 -gencode=arch=compute_89,code=compute_89 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", + "[2/3] c++ -MMD -MF fused_adam_frontend.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/includes -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam -isystem /usr/local/lib/python3.10/dist-packages/torch/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.10/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DBF16_AVAILABLE -c /usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam/fused_adam_frontend.cpp -o fused_adam_frontend.o \n", + "[3/3] c++ fused_adam_frontend.o multi_tensor_adam.cuda.o -shared -L/usr/local/lib/python3.10/dist-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart -o fused_adam.so\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 21.555976390838623 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 21.641409158706665 seconds\n", + "Time to load fused_adam op: 21.641443490982056 seconds\n", + "Time to load fused_adam op: 21.64131236076355 seconds\n", + "Time to load fused_adam op: 21.64161515235901 seconds\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 21.640255212783813 seconds\n", + "Time to load fused_adam op: 21.6422917842865 seconds\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 21.64272451400757 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 27.3 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "94.4 M Trainable params\n", + "0 Non-trainable params\n", + "94.4 M Total params\n", + "377.725 Total estimated model params size (MB)\n", + "Epoch 0: 100%|██| 285/285 [06:40<00:00, 0.71it/s, v_num=sln7, train/loss=5.280]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.06023287773132324 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10158467292785645 seconds\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.1016077995300293 seconds/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10178995132446289 secondsTime to load fused_adam op: 0.10172677040100098 seconds\n", + "\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10198116302490234 seconds\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10247421264648438 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10244083404541016 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 20.5 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 91%|███▋| 10/11 [00:11<00:01, 0.85it/s, v_num=bekp, train/loss=7.560]/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|████| 11/11 [00:13<00:00, 0.81it/s, v_num=bekp, train/loss=7.660]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.06238269805908203 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10197782516479492 seconds\n", + "Time to load fused_adam op: 0.10209941864013672 secondsLoading extension module fused_adam...\n", + "\n", + "Loading extension module fused_adam...\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10231184959411621 seconds\n", + "Time to load fused_adam op: 0.10195589065551758 seconds\n", + "Time to load fused_adam op: 0.10258698463439941 seconds\n", + "Time to load fused_adam op: 0.10204219818115234 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.1022348403930664 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 20.5 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 91%|███▋| 10/11 [00:11<00:01, 0.85it/s, v_num=lsx7, train/loss=7.560]/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Epoch 0: 100%|████| 11/11 [00:13<00:00, 0.81it/s, v_num=lsx7, train/loss=7.620]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Creating extension directory /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam/build.ninja...\n", + "Building extension module cpu_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "[1/4] /home/recursal/miniconda3/envs/rwkv-infctx/bin/nvcc -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/includes -I/home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/TH -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/THC -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_89,code=compute_89 -gencode=arch=compute_89,code=sm_89 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++17 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_89,code=sm_89 -gencode=arch=compute_89,code=compute_89 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -c /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/common/custom_cuda_kernel.cu -o custom_cuda_kernel.cuda.o \n", + "[2/4] c++ -MMD -MF cpu_adam_impl.o.d -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/includes -I/home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/TH -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/THC -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -L/home/recursal/miniconda3/envs/rwkv-infctx/lib -lcudart -lcublas -g -march=native -fopenmp -D__AVX256__ -D__ENABLE_CUDA__ -DBF16_AVAILABLE -c /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/adam/cpu_adam_impl.cpp -o cpu_adam_impl.o \n", + "[3/4] c++ -MMD -MF cpu_adam.o.d -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/includes -I/home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/TH -isystem /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/include/THC -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include -isystem /home/recursal/miniconda3/envs/rwkv-infctx/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -L/home/recursal/miniconda3/envs/rwkv-infctx/lib -lcudart -lcublas -g -march=native -fopenmp -D__AVX256__ -D__ENABLE_CUDA__ -DBF16_AVAILABLE -c /home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/csrc/adam/cpu_adam.cpp -o cpu_adam.o \n", + "[4/4] c++ cpu_adam.o cpu_adam_impl.o custom_cuda_kernel.cuda.o -shared -lcurand -L/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/home/recursal/miniconda3/envs/rwkv-infctx/lib -lcudart -o cpu_adam.so\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.55697989463806 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.60743737220764 seconds\n", + "Loading extension module cpu_adam...\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.604738235473633 seconds\n", + "Time to load cpu_adam op: 29.611495971679688 seconds\n", + "Loading extension module cpu_adam...\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.617934703826904 seconds\n", + "Time to load cpu_adam op: 29.6148624420166 seconds\n", + "Loading extension module cpu_adam...\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 29.61309814453125 seconds\n", + "Time to load cpu_adam op: 29.62095355987549 seconds\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 33.6 M\n", + "1 | blocks | ModuleList | 20.5 M\n", + "2 | ln_out | LayerNorm | 1.0 K \n", + "3 | head | Linear | 33.6 M\n", + "--------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 0%| | 0/11 [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.05372977256774902 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", + "Time to load fused_adam op: 0.10151505470275879 seconds\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam.../home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + "Time to load fused_adam op: 0.10130453109741211 seconds\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10213708877563477 seconds\n", + "Time to load fused_adam op: 0.10188078880310059 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10168170928955078 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10216069221496582 seconds\n", + "Time to load fused_adam op: 0.10223817825317383 seconds\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Parameter Offload: Total persistent parameters: 45056 in 88 params\n", + "\n", + " | Name | Type | Params | Params per Device\n", + "----------------------------------------------------------\n", + "0 | emb | Embedding | 33.6 M | 4.2 M \n", + "1 | blocks | ModuleList | 20.5 M | 2.6 M \n", + "2 | ln_out | LayerNorm | 1.0 K | 128 \n", + "3 | head | Linear | 33.6 M | 4.2 M \n", + "----------------------------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 0%| | 0/11 [00:00) and the tensors embedded in it cannot be detected. The ZeRO-3 hooks designed to trigger before or after backward pass of the module relies on knowing the input and output tensors and therefore may not get triggered properly.\n", + "Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 9%|▍ | 1/11 [00:02<00:22, 0.45it/s, v_num=9uto, train/loss=11.20]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 18%|▉ | 2/11 [00:03<00:16, 0.53it/s, v_num=9uto, train/loss=10.30]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 27%|█▎ | 3/11 [00:05<00:14, 0.57it/s, v_num=9uto, train/loss=9.810]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 36%|█▊ | 4/11 [00:06<00:11, 0.59it/s, v_num=9uto, train/loss=9.380]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 45%|██▎ | 5/11 [00:08<00:09, 0.60it/s, v_num=9uto, train/loss=8.940]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 55%|██▋ | 6/11 [00:09<00:08, 0.61it/s, v_num=9uto, train/loss=8.810]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 64%|███▏ | 7/11 [00:11<00:06, 0.62it/s, v_num=9uto, train/loss=8.440]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 73%|███▋ | 8/11 [00:12<00:04, 0.62it/s, v_num=9uto, train/loss=8.120]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 82%|████ | 9/11 [00:14<00:03, 0.63it/s, v_num=9uto, train/loss=8.000]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 91%|███▋| 10/11 [00:15<00:01, 0.63it/s, v_num=9uto, train/loss=7.720]/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/strategies/deepspeed.py:758: When saving the DeepSpeed Stage 3 checkpoint, each worker will save a shard of the checkpoint within a directory. If a single file is required after training, see https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-3-single-file for instructions.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 100%|████| 11/11 [00:17<00:00, 0.62it/s, v_num=9uto, train/loss=7.750]\n", + "Validation: | | 0/? [00:00=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 8.000e-04 (0.0008)\n", + " - lr_final: 4.000e-04 (0.0004)\n", + "\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "[WARNING]: unlimited bptt_learning_range across multiple GPU's has a performance penalty with datasets of mixed sizes due to its constant need to keep all GPU's in sync (consider using bptt_learning_range=1 instead)\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam/build.ninja...\n", + "Building extension module cpu_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.4077200889587402 seconds\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam/build.ninja...\n", + "Building extension module cpu_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "ninja: no work to do.\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.4514763355255127 seconds\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Installed CUDA version 12.3 does not match the version torch was compiled with 12.1 but since the APIs are compatible, accepting this combination\n", + "Using /home/recursal/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /home/recursal/.cache/torch_extensions/py311_cu121/cpu_adam/build.ninja...\n", + "Building extension module cpu_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.514955759048462 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5427651405334473 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5546679496765137 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5623202323913574 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5747101306915283 seconds\n", + "Loading extension module cpu_adam...\n", + "Time to load cpu_adam op: 2.5695078372955322 seconds\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (11) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "Parameter Offload: Total persistent parameters: 45056 in 88 params\n", + "\n", + " | Name | Type | Params | Params per Device\n", + "----------------------------------------------------------\n", + "0 | emb | Embedding | 33.6 M | 4.2 M \n", + "1 | blocks | ModuleList | 20.5 M | 2.6 M \n", + "2 | ln_out | LayerNorm | 1.0 K | 128 \n", + "3 | head | Linear | 33.6 M | 4.2 M \n", + "----------------------------------------------------------\n", + "87.6 M Trainable params\n", + "0 Non-trainable params\n", + "87.6 M Total params\n", + "350.405 Total estimated model params size (MB)\n", + "Epoch 0: 0%| | 0/11 [00:00) and the tensors embedded in it cannot be detected. The ZeRO-3 hooks designed to trigger before or after backward pass of the module relies on knowing the input and output tensors and therefore may not get triggered properly.\n", + "Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 9%|▍ | 1/11 [00:02<00:26, 0.37it/s, v_num=jhog, train/loss=11.20]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 18%|▉ | 2/11 [00:04<00:20, 0.44it/s, v_num=jhog, train/loss=10.30]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 27%|█▎ | 3/11 [00:06<00:17, 0.47it/s, v_num=jhog, train/loss=9.810]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 36%|█▊ | 4/11 [00:08<00:14, 0.48it/s, v_num=jhog, train/loss=9.380]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 45%|██▎ | 5/11 [00:10<00:12, 0.49it/s, v_num=jhog, train/loss=8.940]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 55%|██▋ | 6/11 [00:11<00:09, 0.50it/s, v_num=jhog, train/loss=8.810]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 64%|███▏ | 7/11 [00:13<00:07, 0.50it/s, v_num=jhog, train/loss=8.500]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 73%|███▋ | 8/11 [00:15<00:05, 0.51it/s, v_num=jhog, train/loss=8.120]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 82%|████ | 9/11 [00:17<00:03, 0.51it/s, v_num=jhog, train/loss=8.000]Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 91%|███▋| 10/11 [00:19<00:01, 0.52it/s, v_num=jhog, train/loss=7.720]/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/strategies/deepspeed.py:758: When saving the DeepSpeed Stage 3 checkpoint, each worker will save a shard of the checkpoint within a directory. If a single file is required after training, see https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-3-single-file for instructions.\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n", + " warnings.warn(\n", + "Invalidate trace cache @ step 1: expected module 0, but got module 1\n", + "Invalidate trace cache @ step 90: expected module 89, but got module 0\n", + "Epoch 0: 100%|████| 11/11 [00:21<00:00, 0.51it/s, v_num=jhog, train/loss=7.750]\n", + "Validation: | | 0/? [00:00] 14.00G 13.5MB/s in 16m 12s \n", + "\n", + "2024-01-20 06:05:14 (14.8 MB/s) - ‘RWKV-5-World-7B-v2-OnlyForTest_72%_trained-20231204-ctx4096.pth?download=true’ saved [15036197229/15036197229]\n", + "\n" + ] + } + ], + "source": [ + "# Lets wget the model files\n", + "!mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"https://huggingface.co/BlinkDL/temp/resolve/2d905a2a30c778086a048e4f65ca75d9f7f9849d/RWKV-5-World-7B-v2-OnlyForTest_72%25_trained-20231204-ctx4096.pth?download=true\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Map (num_proc=160): 100%|███| 1000000/1000000 [00:10<00:00, 95906.26 examples/s]\n", + "Filter (num_proc=160): 100%|█| 1000000/1000000 [00:05<00:00, 179927.28 examples/\n", + "Map (num_proc=160): 100%|█████| 120800/120800 [00:03<00:00, 37916.96 examples/s]\n", + "Map (num_proc=160): 100%|█████| 120800/120800 [00:05<00:00, 21204.90 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 18147/18147 [00:04<00:00, 4124.48 examp\n", + "Saving the dataset (1/1 shards): 100%|█| 13423/13423 [00:00<00:00, 25885.58 exam\n" + ] + } + ], + "source": [ + "# Lets preload the requried dataset \n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-packing.yaml\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Actual training run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-packing.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --data.skip_datapath_setup=True \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/v5-7b-benchmark/baseline/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - 7B - Baseline (packsize=16k, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.target_batch_size=640 \\\n", + " --trainer.microbatch_size=10 \\\n", + " --model.ctx_len=4096 \\\n", + " --trainer.devices=\"{GPU_DEVICES}\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rwkv-infctx", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebook/trainer-v5-validation/config/enwiki_100k-world-16k-packing.yaml b/notebook/trainer-v5-validation/config/enwiki_100k-world-16k-packing.yaml new file mode 100644 index 00000000..31ea7eed --- /dev/null +++ b/notebook/trainer-v5-validation/config/enwiki_100k-world-16k-packing.yaml @@ -0,0 +1,298 @@ +# lightning.pytorch==2.0.2 +seed_everything: 3941088705 +trainer: + + # + # Configure the deepspeed strategy, we recommend you start with `deepspeed_stage_2_offload` + # and adjust from there according to your training needs. `deepspeed_stage_3_offload` is useful + # for training LoRA on large models on a single GPU. + # + # In general you would want to use the following: + # + # - deepspeed_stage_1 : Each of your GPU has too much vram, and you do not know what to do + # + # - deepspeed_stage_2 : Optimal distributed training strategy, across multiple gpu each with sufficient vram + # - deepspeed_stage_2_offload : Reduce vram usage by offloading the optimizer state and work to cpu + # + # - deepspeed_stage_3 : Split up the model across multiple gpu, useful for large models, at a performance cost + # - deepspeed_stage_3_offload : Additional offloading, for even greater performance cost + # + # For more details see: + # https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed-zero-stage-2 + # + strategy: deepspeed_stage_2 + + # Logger setting for wandb, if you want to enable wandb, uncomment the whole logger section + # --- + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + name: 'infctx-v5-unit-test-baseline (train-ctx=4096, data-ctx=16k-packing)' + project: 'RWKV-X-SLoss' + tags: ['RWKV', 'infctx'] + + # Checkpoint settings for the training process + callbacks: + class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + # Configure this to the path you want to save your checkpoints to + # note that a subdir will be created with the name `epoch=x-step=y.ckpt` + # + # to convert a checkpoint to a model, you can use the + # `python3 export_checkpoint.py ` script, + # which will create a `rwkv_model.pth` in the checkpoint directory. + # + # Do not use the `zero_to_fp32.py` script as that will have export format issues + dirpath: ../checkpoint/selective-loss/baseline + filename: null + + # Save the top/last K checkpoints + save_top_k: 3 + # Choose by the most recent checkpoints (step based) + monitor: 'step' + mode: max + + # If enabled (true), save a copy of the latest checkpoint to 'last.ckpt' + # useful to simply checkpoint resume scripts, at a price of disk performance + save_last: false + + # DO NOT set this as true, as the model weight exported will have format issues + # expert as checkpoint, and use the `export_checkpoint.py` script to convert to model instead + save_weights_only: false + + # How frequent you want to save a checkpoint for every step. + # This will happen for every X data sample, where X = every_n_train_steps * accumulate_grad_batches + # + # In general you will want to avoid putting a low number (expecially if accumulate_grad_batches <= 100) + # as the checkpoint process, will pause all the gpu training for some time, slowing down the overall process + # However you do not want to configure too high of a number, where you will lose too much progress if the training crashes + every_n_train_steps: null + every_n_epochs: 1 + save_on_train_epoch_end: true + train_time_interval: null + + # Other settings, you can probably leave alone + verbose: false + auto_insert_metric_name: true + + ######################################## + ## Training run parameter settings + ######################################## + + # Generally what you want to configure is the maximum number of epochs + # Leave it as -1, and it will keep going forever till interrupted + # Or set it as a number, and it will stop after that number of epochs + max_epochs: -1 + min_epochs: null + max_steps: -1 + min_steps: null + max_time: null + + # Number of datasamples to train for each step, a data sample is considered + # a "substep" in wandb logs, and a "step" is tracked as "trainer/global_step" + # + # This decides the number of datasample, to learn together from, before backproping + # any weight changes at the end of the batch. + # + # Recommended to be a big enough number (like 128/256) where it prevents the training + # loss from flucuating in the process. But not too big of a number where the increased + # GPU vRAM / offloaded RAM usage will cause the training to crash. + # + # You are also recommended to configure this to a large enough number to fully utilize + # your GPU processing time %, and avoid idle time for the GPU between batches + # + # This number is divided by the number of GPUs, and nodes configured + # So if you have 4 GPUs, and 2 nodes, and this is configured as 128 + # Each GPU will process 128/4/2 = 16 datasamples per step, via accumulate_grad_batches + target_batch_size: 16 + +######################################## +## Training model settings +######################################## +model: + # Model to start the finetune/training process from + load_model: ../model/L24-D2048-world-v5base-init.pth + + # Context length to use for the training process + # the larger the number (and batch size) the larger the vram usage + # + # Note that if the datasample context length is larger then the ctx_len + # its training process would be split into ctx_len sized chunks. + # + # This allows the training of extreamly large context length (eg. 100k), + # without eating up too much vram by keeping the training context length + # to a resonable number sutible to the current GPU setup + ctx_len: 2048 + + # Data samples would be cut down to the respective max ctx_len_cutoffs + # values if its larger then ctx_len. If the data sample is larger then + # the largest len_cutoff, the remaining data will be discarded + ctx_len_cutoffs: [] + # Experimental settings, number of tokens to skip in the data sample + # prefix, for the respective cutoff length. Used to speed up the process + ctx_len_warmup_steps: [] + + # Learning rate of the training process + # --- + + # Initia learning rate of the process + lr_init: 8e-4 + # Final learning rate after the learning rate period + # learning rate will stay at final value from then onwards + lr_final: 3e-4 + + # Number of epoch to reduce the learning rate from lr_init to lr_final + # 1 means a single epoch (so lr would be lr_final from epoch 2 onwards) + # 0 means lr_final will apply immediately + # -1 means we take the current max_step / max_epoch as the period + lr_period: 2 + # lr_period type if its set, defaults to epoch + lr_period_type: epoch + + # Adam optimizer settings + # You probably want to leave this alone, unless you know what you are doing + beta1: 0.9 + beta2: 0.99 + adam_eps: 1.0e-08 + weight_decay: 0.01 + + # torch.set_float32_matmul_precision, used to optimize operations with tensor cores + # this should be set as null, for non cuda core GPUs + torch_set_float32_matmul_precision: 'high' + # torch_set_float32_matmul_precision: null + + # Segmented based learning, used to work around training of large context length + # beyond what can be supported by the current GPU vram architecture + # + # This is not 1:1 equivalent to the same training process with required vram + # as the training process is split into multiple segments, part by part. + # with limited learnings from the previous segment. + bptt_learning: true + + # Segmented range to performing backprop learning on + # 1 means to apply only for the last segment + # -1 means to apply for all segments + bptt_learning_range: -1 + +data: + # Skip the datapath setup + # ignored if using the preload_datapath.py + skip_datapath_setup: True + + # dataset_path for the prebuilt dataset, using HF `load_from_disk()` + # + # Use this if you have built your own dataset and saved it with `save_to_disk()` + # with source left as null. Other wise configure this to a directory which the + # dataset will be built and tokenized by the huggingface dataset process. + data_path: ../datapath/enwiki_100k-world-16k-packing/ + + # Other wise provide the source path, which is used as huggingface dataset path + # this will be used to populate the dataset_path + # + # Use either the following + # - hugging face dataset + # - Directory path to a directory containing dataset files + # - Path to a single dataset file + # - hugging face dataset mode (ie: text,csv,etc - use data_dir, to configure the path then) + # - null + # + # If source is disabled, all other params, except data_path, is ignored + source: "teven/enwiki_100k" + # source: text + # source: /home/ubuntu/RWKV-LM-LoRA/dataset-text/enwik8.txt + + # Use data_dir, if you are using source=text/json/etc + # this should be relative to the trainer script path + source_data_dir: null + + # After loading the dataset, split out test data used for unit-test, + # This process is skipped if the dataset includes a test split + # This process is skipped if set to zero + test_split: 0.10 + test_split_shuffle: true + + # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer + # If using a custom tokenizer, provide the tokenizer file path + # --- + tokenizer: world + + # Minimum / Maximum token size of the dataset to use + # useful for filtering out small noisy data samples from large datasets + # (eg. removal of small articles of less then 512 tokens from wikipedia) + # + # This is ignored, if set to -1 + min_token_size: 1024 + max_token_size: 16384 + + # Rechunking of text dataset, this is done only when source is set as 'text' + # and will merge the various sentencees, into larger chunks up to the target size + # + # Defaults to 4096 + # + # This is ignored, if source is not set as text + # This is ignored, if set to zero + # --- + # text_rechunk_size: 4096 + + # Apply text rechunk to the dataset, even if its not a 'text' source + # This is done only after dataset filtering, and if source is not 'text' + # --- + # text_rechunk_force: true + + # Custom text column to use, useful for dataset with alternative training columns labels + # This is checked before multi column merging, default is null (disabled) + # eg: 'code' + # --- + # custom_text_key: 'code' + + # Multi Column merging process, default setting is used to support and merge + # "instruction", "input", "output", datasets. To disable set multi_column_keys to [] + # + # A minimum of 2 columns is required, with non empty data, for the merge to occur + # If no match is found, this will fallback to the default prompt/completion or text column, + # or throw an error if the default fallback is not found + # --- + # multi_column_keys: ['instruction', 'input', 'output'] + # multi_column_prefix: ['Instruction:\n', 'Input:\n', 'Output:\n'] + # multi_column_train_mask: [true, false, true] + # multi_column_separator: '\n\n' + + # If processing prompt/completion jsonl pairs, the prompt is masked by default + # use this flag to disable this default behaviour + # --- + # disable_prompt_completion_mask: false + + # ---------------------------- + # Dataset packing support + # Recommended to be used with mixed documents sized finetuning + # For foundation model "from scratch", rechunking is typically used instead + # ---------------------------- + + # Boolean flag to enable / disable dataset packing + packing_enable: True + + # Used to ensure all training samples wihin this batch size is the same length + # Ideally this should align exactly with your real "batch size" + # + # Uses, `8 * (3 * 4 * 5 * 6 * 7) = 20160` for default, as it should align across + # a large number of batch size combinations. This helps reduce the amount of + # misaligned batches, and thus reduce the amount of wasted training time. + packing_batchsize: 128 + + # Chunking size to align within each batch, this ideally should be equal to + # the training context length used. + packing_chunksize: 4096 + + # Minimum size to pack up to, this should be a multiple of packing_chunksize + # defautls to -1, which equals to packing_chunksize + packing_min_ctx_len: 16384 + + # Pack the data sequentially if possible, in accordance to the dataset sequence + # this can be used together with sort_by_length + packing_in_sequence: False + +# Path to the current checkpoint to continue training from +# Enable this to the last checkpoint after the first run +# (if it crash and you want to resume) +# ckpt_path: ../checkpoint/trainer-validaiton/infctx-unit-test-baseline/epoch=0-step=20.ckpt +ckpt_path: null