2005-tr · favilances · Dec 1, 2024 · Dec 1, 2024 · Dec 1, 2024 · Dec 1, 2024
diff --git a/sample.py b/sample.py
@@ -11,7 +11,7 @@
 # -----------------------------------------------------------------------------
 init_from = 'resume' # either 'resume' (from an out_dir) or a gpt2 variant (e.g. 'gpt2-xl')
 out_dir = 'out' # ignored if init_from is not 'resume'
-start = "\n" # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt"
+start = "\n \n" # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt"
 num_samples = 10 # number of samples to draw
 max_new_tokens = 500 # number of tokens generated in each sample
 temperature = 0.8 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions

diff --git a/scraper.py b/scraper.py
@@ -6,7 +6,7 @@
 
 def get_book_links():
     # Base URL for the Gutenberg Index API
-    base_url = 'https://gutendex.com/books/'
+    base_url = 'https://gutendex.com/'
     links = {}  # Initialize a dictionary to store all links
 
     # Make API calls for each page number

diff --git a/train.py b/train.py
@@ -42,7 +42,7 @@
 # wandb logging
 wandb_log = False # disabled by default
 wandb_project = 'owt'
-wandb_run_name = 'gpt2-2005' # 'run' + str(time.time())
+wandb_run_name = 'gpt-2005' # 'run' + str(time.time())
 # data
 dataset = 'mazew'
 gradient_accumulation_steps = 5 * 8 # used to simulate larger batch sizes
@@ -69,7 +69,7 @@
 # DDP settings
 backend = 'nccl' # 'nccl', 'gloo', etc.
 # system
-device = 'cuda' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1' etc., or try 'mps' on macbooks
+device = 'cpu' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1' etc., or try 'mps' on macbooks
 dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32', 'bfloat16', or 'float16', the latter will auto implement a GradScaler
 compile = True # use PyTorch 2.0 to compile the model to be faster
 # -----------------------------------------------------------------------------