From f21e3d669e8354b28bef7b8ab3b371e0e10a3538 Mon Sep 17 00:00:00 2001 From: William Warriner <6930772+wwarriner@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:33:12 -0600 Subject: [PATCH] added comments --- scripts/linkchecker.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/linkchecker.py b/scripts/linkchecker.py index fbad7e1f..49aa63ab 100644 --- a/scripts/linkchecker.py +++ b/scripts/linkchecker.py @@ -4,6 +4,12 @@ import pandas as pd +""" +How to use: + +python ./scripts/linkchecker.py +""" + # Cleans up output of linkchecker OUTPUT = PurePath("out") @@ -108,13 +114,13 @@ def ignore_rows_containing( # special ignore rules df = ignore_rows_containing( df, URL_IN_MARKDOWN, "https://doi.org", if_result_code="200" - ) + ) # doi.org always redirects, that's its purpose, so we ignore df = ignore_rows_containing( df, URL_IN_MARKDOWN, "https://anaconda.org", if_result_code="403" - ) + ) # if anaconda.org goes down we'll surely hear about it df = ignore_rows_containing( df, URL_AFTER_REDIRECTION, "https://padlock.idm.uab.edu", if_result_code="423" - ) + ) # padlock urls means we would need to check manually anyway # organize df = df.sort_values(by=[RESULT, URL_IN_MARKDOWN, MARKDOWN_FILE, LINE, COLUMN])