From f21e3d669e8354b28bef7b8ab3b371e0e10a3538 Mon Sep 17 00:00:00 2001
From: William Warriner <6930772+wwarriner@users.noreply.github.com>
Date: Thu, 21 Nov 2024 15:33:12 -0600
Subject: [PATCH] added comments

---
 scripts/linkchecker.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/scripts/linkchecker.py b/scripts/linkchecker.py
index fbad7e1f..49aa63ab 100644
--- a/scripts/linkchecker.py
+++ b/scripts/linkchecker.py
@@ -4,6 +4,12 @@
 
 import pandas as pd
 
+"""
+How to use:
+
+python ./scripts/linkchecker.py
+"""
+
 # Cleans up output of linkchecker
 
 OUTPUT = PurePath("out")
@@ -108,13 +114,13 @@ def ignore_rows_containing(
     # special ignore rules
     df = ignore_rows_containing(
         df, URL_IN_MARKDOWN, "https://doi.org", if_result_code="200"
-    )
+    )  # doi.org always redirects, that's its purpose, so we ignore
     df = ignore_rows_containing(
         df, URL_IN_MARKDOWN, "https://anaconda.org", if_result_code="403"
-    )
+    )  # if anaconda.org goes down we'll surely hear about it
     df = ignore_rows_containing(
         df, URL_AFTER_REDIRECTION, "https://padlock.idm.uab.edu", if_result_code="423"
-    )
+    )  # padlock urls means we would need to check manually anyway
 
     # organize
     df = df.sort_values(by=[RESULT, URL_IN_MARKDOWN, MARKDOWN_FILE, LINE, COLUMN])