From 353dac958d4e3cfba5be5ec33ab86078d93cead3 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Wed, 6 Dec 2023 14:53:13 +0800
Subject: [PATCH 1/5] Use the `coverage` package directly

ignore `taxondata.py` to speed up tests and generate HTML report
---
 .github/workflows/test.yml | 14 +++++++++-----
 Makefile                   | 16 +++++++++-------
 README.md                  | 22 ++--------------------
 pyproject.toml             |  8 +++++---
 4 files changed, 25 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 041f076e..ee90bf08 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,19 +30,23 @@ jobs:
       - run: |
           python -m pip install -U pip
           python -m pip install --use-pep517 '.[dev]'
-      - run: make test
+      - run: make test_coverage
         env:
           PYTHONWARNINGS: default
-      - name: Remove huge file taxondata_py.html
-        run: rm -f htmlcov/*_taxondata_py.html
+
+  coverage_report:
+    if: github.ref_name == 'master'
+    needs: test
+    runs-on: ubuntu-latest
+    steps:
+      - run: make coverage_report
       - uses: actions/upload-pages-artifact@v2
-        if: github.ref_name == 'master' && matrix.python-version == '3.12'
         with:
           path: htmlcov
 
   deploy:
     if: github.ref_name == 'master'
-    needs: test
+    needs: coverage_report
     permissions:
       pages: write
       id-token: write
diff --git a/Makefile b/Makefile
index be49138b..67a9eb61 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,13 @@
 # Run "make test" to run tests (with coverage analysis left in ./htmlcov)
 # Run "make clean" to remove automatically generated files
-
 test:
-	rm -rf .coverage htmlcov
-	python -m nose2 --output-buffer --pretty-assert --with-coverage --coverage-report=html
-quicktest:
-	python -m nose2 --output-buffer --pretty-assert
+	python -m unittest discover -s tests
+test_coverage:
+	python -m coverage erase
+	python -m coverage run -m unittest discover -s tests
+coverage_report:
+	python -m coverage combine
+	python -m coverage html
 clean:
-	rm -rf __pycache__
-	rm -rf .coverage* htmlcov*
+	python -m coverage erase
+	rm -rf __pycache__ htmlcov*
diff --git a/README.md b/README.md
index b4aff931..bf3aec7c 100644
--- a/README.md
+++ b/README.md
@@ -322,13 +322,11 @@ python -m pip install -U pip
 python -m pip install --use-pep517 .
 ```
 
-This software requires Python 3.
-
 ### Running tests
 
 This package includes tests written using the `unittest` framework.
-They can be run using, for example, `nose2`, which can be installed
-using `python -m pip install --use-pep517 -e ".[dev]"`.
+The test dependencies can be installed with command
+`python -m pip install --use-pep517 -e ".[dev]"`.
 
 To run the tests, use the following command in the top-level directory:
 
@@ -358,22 +356,6 @@ updated regularly with the latest Wiktionary dump.  Using the
 pre-extracted data may be the easiest option unless you have special
 needs or want to modify the code.
 
-### Installing and running tests on Windows with VS Code
-
-Tested with Python 3.9.4.
-
-- Create [a Python virtual environment](https://code.visualstudio.com/docs/python/environments#_creating-environments)
-(venv) in the VS Code workspace with the cloned repo. It should automatically install the package.
-
-- Open a new terminal. It should be PowerShell. You may need to [fix terminal permissions](https://stackoverflow.com/questions/56199111/visual-studio-code-cmd-error-cannot-be-loaded-because-running-scripts-is-disabl/67420296#67420296)
-in order for it to pick up the virtual environment correclty.
-
-- In the terminal run this command:
-
-```
-py -m nose2 -B
-```
-
 ## Using the command-line tool
 
 The ``wiktwords`` script is the easiest way to extract data from
diff --git a/pyproject.toml b/pyproject.toml
index d596dbfc..88d44ccb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,11 +40,10 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
     "black",
+    "coverage[toml]",
     "jsonschema",
     "mypy",
-    "nose2[coverage_plugin]",
     "ruff",
-    "tomli; python_version <= '3.10'",  # for coverage parsing TOML file
 ]
 
 [project.scripts]
@@ -65,7 +64,10 @@ wiktextract = [
 [tool.coverage.run]
 branch = true
 concurrency = ["multiprocessing"]
-omit = ["tests/*"]
+omit = [
+    "tests/*",
+    "src/wiktextract/taxondata.py",  # huge file
+]
 
 [tool.black]
 line-length = 80

From ea62b0ad0854fd6ba72607d32a7e26cd0beb82d2 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Wed, 6 Dec 2023 16:44:09 +0800
Subject: [PATCH 2/5] Add JSON schema files to GitHub Pages

---
 .github/workflows/test.yml     |  18 +-
 .gitignore                     |   3 +
 Makefile                       |  12 +-
 json_schema/de.json            | 932 ---------------------------------
 json_schema/es.json            | 876 -------------------------------
 json_schema/generate_schema.py |   7 +-
 json_schema/ru.json            | 199 -------
 pyproject.toml                 |   3 +
 tools/github_pages.py          |  43 ++
 9 files changed, 69 insertions(+), 2024 deletions(-)
 delete mode 100644 json_schema/de.json
 delete mode 100644 json_schema/es.json
 delete mode 100644 json_schema/ru.json
 create mode 100644 tools/github_pages.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ee90bf08..e463410f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -29,24 +29,20 @@ jobs:
           cache: 'pip'
       - run: |
           python -m pip install -U pip
-          python -m pip install --use-pep517 '.[dev]'
+          python -m pip install --use-pep517 -e '.[dev]'
       - run: make test_coverage
         env:
           PYTHONWARNINGS: default
-
-  coverage_report:
-    if: github.ref_name == 'master'
-    needs: test
-    runs-on: ubuntu-latest
-    steps:
-      - run: make coverage_report
+      - run: |
+          make coverage_report
+          make github_pages
+        if: github.ref_name == 'master' && matrix.python-version == '3.12'
       - uses: actions/upload-pages-artifact@v2
-        with:
-          path: htmlcov
+        if: github.ref_name == 'master' && matrix.python-version == '3.12'
 
   deploy:
     if: github.ref_name == 'master'
-    needs: coverage_report
+    needs: test
     permissions:
       pages: write
       id-token: write
diff --git a/.gitignore b/.gitignore
index a9b02002..1271ff87 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,6 @@ usertools/data/
 bac-wikt-*
 pagesbac/
 wikt-db*
+
+# GitHub Pages
+_site
diff --git a/Makefile b/Makefile
index 67a9eb61..eff16dda 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,17 @@
-# Run "make test" to run tests (with coverage analysis left in ./htmlcov)
+# Run "make test" to run tests
 # Run "make clean" to remove automatically generated files
 test:
-	python -m unittest discover -s tests
+	python -m unittest discover -b -s tests
 test_coverage:
 	python -m coverage erase
-	python -m coverage run -m unittest discover -s tests
+	python -m coverage run -m unittest discover -b -s tests
 coverage_report:
 	python -m coverage combine
 	python -m coverage html
+github_pages:
+	python json_schema/generate_schema.py
+	cp json_schema/*.json _site
+	python tools/github_pages.py
 clean:
 	python -m coverage erase
-	rm -rf __pycache__ htmlcov*
+	rm -rf __pycache__ _site
diff --git a/json_schema/de.json b/json_schema/de.json
deleted file mode 100644
index a5d645d4..00000000
--- a/json_schema/de.json
+++ /dev/null
@@ -1,932 +0,0 @@
-{
-  "$defs": {
-    "Example": {
-      "additionalProperties": false,
-      "properties": {
-        "ref": {
-          "anyOf": [
-            {
-              "$ref": "#/$defs/Reference"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": ""
-        },
-        "text": {
-          "default": null,
-          "description": "Example usage sentence",
-          "title": "Text",
-          "type": "string"
-        }
-      },
-      "title": "Example",
-      "type": "object"
-    },
-    "Reference": {
-      "additionalProperties": false,
-      "properties": {
-        "accessdate": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Date of access of online reference",
-          "title": "Accessdate"
-        },
-        "author": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Author's name",
-          "title": "Author"
-        },
-        "collection": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Name of collection that reference was published in",
-          "title": "Collection"
-        },
-        "comment": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Comment on the reference",
-          "title": "Comment"
-        },
-        "date": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Date of publication",
-          "title": "Date"
-        },
-        "day": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Day of publication",
-          "title": "Day"
-        },
-        "edition": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Edition number",
-          "title": "Edition"
-        },
-        "editor": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Editor",
-          "title": "Editor"
-        },
-        "isbn": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "ISBN number",
-          "title": "Isbn"
-        },
-        "month": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Month of publication",
-          "title": "Month"
-        },
-        "number": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Issue number",
-          "title": "Number"
-        },
-        "pages": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Page numbers",
-          "title": "Pages"
-        },
-        "place": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Place of publication",
-          "title": "Place"
-        },
-        "publisher": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Published by",
-          "title": "Publisher"
-        },
-        "raw_ref": {
-          "default": null,
-          "description": "Raw reference string",
-          "title": "Raw Ref",
-          "type": "string"
-        },
-        "title": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Title of the reference",
-          "title": "Title"
-        },
-        "title_complement": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Complement to the title",
-          "title": "Title Complement"
-        },
-        "translator": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Translator",
-          "title": "Translator"
-        },
-        "url": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "A web link. Not necessarily well-formated.",
-          "title": "Url"
-        },
-        "volume": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Volume number",
-          "title": "Volume"
-        },
-        "year": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Year of publication",
-          "title": "Year"
-        }
-      },
-      "title": "Reference",
-      "type": "object"
-    },
-    "Sense": {
-      "additionalProperties": false,
-      "properties": {
-        "antonyms": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Antonyms"
-        },
-        "categories": {
-          "default": [],
-          "description": "list of sense-disambiguated category names extracted from (a subset) of the Category links on the page",
-          "items": {
-            "type": "string"
-          },
-          "title": "Categories",
-          "type": "array"
-        },
-        "coordinate_terms": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Coordinate Terms"
-        },
-        "derived": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Derived"
-        },
-        "examples": {
-          "default": [],
-          "description": "List of examples",
-          "items": {
-            "$ref": "#/$defs/Example"
-          },
-          "title": "Examples",
-          "type": "array"
-        },
-        "expressions": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Expressions"
-        },
-        "glosses": {
-          "default": [],
-          "description": "list of gloss strings for the word sense (usually only one). This has been cleaned, and should be straightforward text with no tagging.",
-          "items": {
-            "type": "string"
-          },
-          "title": "Glosses",
-          "type": "array"
-        },
-        "holonyms": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Holonyms"
-        },
-        "hypernyms": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Hypernyms"
-        },
-        "hyponyms": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Hyponyms"
-        },
-        "proverbs": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Proverbs"
-        },
-        "raw_glosses": {
-          "default": [],
-          "description": "list of uncleaned raw glosses for the word sense (usually only one).",
-          "items": {
-            "type": "string"
-          },
-          "title": "Raw Glosses",
-          "type": "array"
-        },
-        "senseid": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Sense number used in Wiktionary",
-          "title": "Senseid"
-        },
-        "synonyms": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Synonyms"
-        },
-        "tags": {
-          "default": [],
-          "description": "list of gloss strings for the word sense (usually only one). This has been cleaned, and should be straightforward text with no tagging.",
-          "items": {
-            "type": "string"
-          },
-          "title": "Tags",
-          "type": "array"
-        },
-        "translations": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Translation"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Translations"
-        }
-      },
-      "title": "Sense",
-      "type": "object"
-    },
-    "Sound": {
-      "additionalProperties": false,
-      "properties": {
-        "audio": {
-          "default": [],
-          "description": "Audio file name",
-          "items": {
-            "type": "string"
-          },
-          "title": "Audio",
-          "type": "array"
-        },
-        "flac_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Flac Url",
-          "type": "array"
-        },
-        "ipa": {
-          "default": [],
-          "description": "International Phonetic Alphabet",
-          "items": {
-            "type": "string"
-          },
-          "title": "Ipa",
-          "type": "array"
-        },
-        "lang_code": {
-          "default": [],
-          "description": "Wiktionary language code",
-          "items": {
-            "type": "string"
-          },
-          "title": "Lang Code",
-          "type": "array"
-        },
-        "lang_name": {
-          "default": [],
-          "description": "Localized language name",
-          "items": {
-            "type": "string"
-          },
-          "title": "Lang Name",
-          "type": "array"
-        },
-        "mp3_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Mp3 Url",
-          "type": "array"
-        },
-        "oga_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Oga Url",
-          "type": "array"
-        },
-        "ogg_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Ogg Url",
-          "type": "array"
-        },
-        "tags": {
-          "default": [],
-          "description": "Specifying the variant of the pronunciation",
-          "items": {
-            "type": "string"
-          },
-          "title": "Tags",
-          "type": "array"
-        },
-        "wav_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Wav Url",
-          "type": "array"
-        }
-      },
-      "title": "Sound",
-      "type": "object"
-    },
-    "Translation": {
-      "additionalProperties": false,
-      "properties": {
-        "lang_code": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Wiktionary language code of the translation term",
-          "title": "Lang Code"
-        },
-        "lang_name": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Localized language name",
-          "title": "Lang Name"
-        },
-        "notes": {
-          "default": [],
-          "description": "A list of notes",
-          "items": {
-            "type": "string"
-          },
-          "title": "Notes",
-          "type": "array"
-        },
-        "roman": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Transliteration in roman characters",
-          "title": "Roman"
-        },
-        "sense": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "A gloss of the sense being translated",
-          "title": "Sense"
-        },
-        "tags": {
-          "default": [],
-          "description": "Tags specifying the translated term, usually gender information",
-          "items": {
-            "type": "string"
-          },
-          "title": "Tags",
-          "type": "array"
-        },
-        "uncertain": {
-          "anyOf": [
-            {
-              "type": "boolean"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": false,
-          "description": "Translation marked as uncertain",
-          "title": "Uncertain"
-        },
-        "word": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Translation term",
-          "title": "Word"
-        }
-      },
-      "title": "Translation",
-      "type": "object"
-    }
-  },
-  "$id": "https://kaikki.org/de.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "description": "WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.",
-  "properties": {
-    "antonyms": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Antonyms"
-    },
-    "coordinate_terms": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Coordinate Terms"
-    },
-    "derived": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Derived"
-    },
-    "expressions": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Expressions"
-    },
-    "holonyms": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Holonyms"
-    },
-    "hypernyms": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Hypernyms"
-    },
-    "hyponyms": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Hyponyms"
-    },
-    "lang_code": {
-      "description": "Wiktionary language code",
-      "examples": [
-        "es"
-      ],
-      "title": "Lang Code",
-      "type": "string"
-    },
-    "lang_name": {
-      "description": "Localized language name of the word",
-      "examples": [
-        "español"
-      ],
-      "title": "Lang Name",
-      "type": "string"
-    },
-    "pos": {
-      "default": null,
-      "description": "Part of speech type",
-      "title": "Pos",
-      "type": "string"
-    },
-    "proverbs": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Proverbs"
-    },
-    "senses": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Sense"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Senses"
-    },
-    "sounds": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Sound"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Sounds"
-    },
-    "synonyms": {
-      "anyOf": [
-        {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Synonyms"
-    },
-    "translations": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Translation"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Translations"
-    },
-    "word": {
-      "description": "word string",
-      "title": "Word",
-      "type": "string"
-    }
-  },
-  "required": [
-    "word",
-    "lang_code",
-    "lang_name"
-  ],
-  "title": "German Wiktionary",
-  "type": "object"
-}
\ No newline at end of file
diff --git a/json_schema/es.json b/json_schema/es.json
deleted file mode 100644
index 7dc1d6f2..00000000
--- a/json_schema/es.json
+++ /dev/null
@@ -1,876 +0,0 @@
-{
-  "$defs": {
-    "Example": {
-      "additionalProperties": false,
-      "properties": {
-        "ref": {
-          "anyOf": [
-            {
-              "$ref": "#/$defs/Reference"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": ""
-        },
-        "text": {
-          "description": "Example usage sentence",
-          "title": "Text",
-          "type": "string"
-        },
-        "translation": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Spanish translation of the example sentence",
-          "title": "Translation"
-        }
-      },
-      "required": [
-        "text"
-      ],
-      "title": "Example",
-      "type": "object"
-    },
-    "Linkage": {
-      "additionalProperties": false,
-      "properties": {
-        "alternative_spelling": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Alternative spelling of the word",
-          "title": "Alternative Spelling"
-        },
-        "note": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Note"
-        },
-        "word": {
-          "title": "Word",
-          "type": "string"
-        }
-      },
-      "required": [
-        "word"
-      ],
-      "title": "Linkage",
-      "type": "object"
-    },
-    "Reference": {
-      "additionalProperties": false,
-      "properties": {
-        "chapter": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Chapter name",
-          "title": "Chapter"
-        },
-        "date": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Date of publication",
-          "title": "Date"
-        },
-        "editor": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Editor",
-          "title": "Editor"
-        },
-        "first_name": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Author's first name",
-          "title": "First Name"
-        },
-        "journal": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Name of journal",
-          "title": "Journal"
-        },
-        "last_name": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Author's last name",
-          "title": "Last Name"
-        },
-        "pages": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Page numbers",
-          "title": "Pages"
-        },
-        "place": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Place of publication",
-          "title": "Place"
-        },
-        "title": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Title of the reference",
-          "title": "Title"
-        },
-        "url": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "A web link",
-          "title": "Url"
-        },
-        "year": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Year of publication",
-          "title": "Year"
-        }
-      },
-      "title": "Reference",
-      "type": "object"
-    },
-    "Sense": {
-      "additionalProperties": false,
-      "properties": {
-        "antonyms": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Antonyms"
-        },
-        "categories": {
-          "default": [],
-          "description": "list of sense-disambiguated category names extracted from (a subset) of the Category links on the page",
-          "items": {
-            "type": "string"
-          },
-          "title": "Categories",
-          "type": "array"
-        },
-        "compounds": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Compounds"
-        },
-        "derived": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Derived"
-        },
-        "examples": {
-          "default": [],
-          "description": "List of examples",
-          "items": {
-            "$ref": "#/$defs/Example"
-          },
-          "title": "Examples",
-          "type": "array"
-        },
-        "glosses": {
-          "description": "list of gloss strings for the word sense (usually only one). This has been cleaned, and should be straightforward text with no tagging.",
-          "items": {
-            "type": "string"
-          },
-          "title": "Glosses",
-          "type": "array"
-        },
-        "hypernyms": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Hypernyms"
-        },
-        "hyponyms": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Hyponyms"
-        },
-        "idioms": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Idioms"
-        },
-        "meronyms": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Meronyms"
-        },
-        "related": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Related"
-        },
-        "senseid": {
-          "anyOf": [
-            {
-              "type": "integer"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Sense number used in Wiktionary",
-          "title": "Senseid"
-        },
-        "synonyms": {
-          "anyOf": [
-            {
-              "items": {
-                "$ref": "#/$defs/Linkage"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "title": "Synonyms"
-        },
-        "tags": {
-          "default": [],
-          "description": "list of gloss strings for the word sense (usually only one). This has been cleaned, and should be straightforward text with no tagging.",
-          "items": {
-            "type": "string"
-          },
-          "title": "Tags",
-          "type": "array"
-        }
-      },
-      "required": [
-        "glosses"
-      ],
-      "title": "Sense",
-      "type": "object"
-    },
-    "Sound": {
-      "additionalProperties": false,
-      "properties": {
-        "audio": {
-          "default": [],
-          "description": "Audio file name",
-          "items": {
-            "type": "string"
-          },
-          "title": "Audio",
-          "type": "array"
-        },
-        "flac_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Flac Url",
-          "type": "array"
-        },
-        "ipa": {
-          "default": [],
-          "description": "International Phonetic Alphabet",
-          "items": {
-            "type": "string"
-          },
-          "title": "Ipa",
-          "type": "array"
-        },
-        "mp3_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Mp3 Url",
-          "type": "array"
-        },
-        "ogg_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Ogg Url",
-          "type": "array"
-        },
-        "phonetic_transcription": {
-          "default": [],
-          "description": "Phonetic transcription, less exact than IPA.",
-          "items": {
-            "type": "string"
-          },
-          "title": "Phonetic Transcription",
-          "type": "array"
-        },
-        "roman": {
-          "default": [],
-          "description": "Translitaration to Roman characters",
-          "items": {
-            "type": "string"
-          },
-          "title": "Roman",
-          "type": "array"
-        },
-        "syllabic": {
-          "default": [],
-          "description": "Syllabic transcription",
-          "items": {
-            "type": "string"
-          },
-          "title": "Syllabic",
-          "type": "array"
-        },
-        "tag": {
-          "default": [],
-          "description": "Specifying the variant of the pronunciation",
-          "items": {
-            "type": "string"
-          },
-          "title": "Tag",
-          "type": "array"
-        },
-        "wav_url": {
-          "default": [],
-          "items": {
-            "type": "string"
-          },
-          "title": "Wav Url",
-          "type": "array"
-        }
-      },
-      "title": "Sound",
-      "type": "object"
-    },
-    "Spelling": {
-      "additionalProperties": false,
-      "properties": {
-        "alternative": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Alternative spelling with same pronunciation",
-          "title": "Alternative"
-        },
-        "note": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Note regarding alternative spelling",
-          "title": "Note"
-        },
-        "same_pronunciation": {
-          "anyOf": [
-            {
-              "type": "boolean"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Whether the alternative spelling has the same pronunciation as the default spelling",
-          "title": "Same Pronunciation"
-        }
-      },
-      "title": "Spelling",
-      "type": "object"
-    },
-    "Translation": {
-      "additionalProperties": false,
-      "properties": {
-        "lang_code": {
-          "description": "Wiktionary language code of the translation term",
-          "title": "Lang Code",
-          "type": "string"
-        },
-        "notes": {
-          "default": [],
-          "description": "A list of notes",
-          "items": {
-            "type": "string"
-          },
-          "title": "Notes",
-          "type": "array"
-        },
-        "roman": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Transliteration in roman characters",
-          "title": "Roman"
-        },
-        "senseids": {
-          "default": [],
-          "description": "List of senseids where this translation applies",
-          "items": {
-            "type": "string"
-          },
-          "title": "Senseids",
-          "type": "array"
-        },
-        "tags": {
-          "default": [],
-          "description": "Tags specifying the translated term, usually gender information",
-          "items": {
-            "type": "string"
-          },
-          "title": "Tags",
-          "type": "array"
-        },
-        "word": {
-          "description": "Translation term",
-          "title": "Word",
-          "type": "string"
-        }
-      },
-      "required": [
-        "word",
-        "lang_code"
-      ],
-      "title": "Translation",
-      "type": "object"
-    }
-  },
-  "$id": "https://kaikki.org/es.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "description": "WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.",
-  "properties": {
-    "antonyms": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Antonyms"
-    },
-    "categories": {
-      "default": [],
-      "description": "list of non-disambiguated categories for the word",
-      "items": {
-        "type": "string"
-      },
-      "title": "Categories",
-      "type": "array"
-    },
-    "compounds": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Compounds"
-    },
-    "derived": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Derived"
-    },
-    "hypernyms": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Hypernyms"
-    },
-    "hyponyms": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Hyponyms"
-    },
-    "idioms": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Idioms"
-    },
-    "lang_code": {
-      "description": "Wiktionary language code",
-      "examples": [
-        "es"
-      ],
-      "title": "Lang Code",
-      "type": "string"
-    },
-    "lang_name": {
-      "description": "Localized language name of the word",
-      "examples": [
-        "español"
-      ],
-      "title": "Lang Name",
-      "type": "string"
-    },
-    "meronyms": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Meronyms"
-    },
-    "pos": {
-      "default": null,
-      "description": "Part of speech type",
-      "title": "Pos",
-      "type": "string"
-    },
-    "pos_title": {
-      "default": null,
-      "description": "Original POS title",
-      "title": "Pos Title",
-      "type": "string"
-    },
-    "related": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Related"
-    },
-    "senses": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Sense"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Senses"
-    },
-    "sounds": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Sound"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Sounds"
-    },
-    "spellings": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Spelling"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Spellings"
-    },
-    "synonyms": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Linkage"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Synonyms"
-    },
-    "translations": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Translation"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Translations"
-    },
-    "word": {
-      "description": "word string",
-      "title": "Word",
-      "type": "string"
-    }
-  },
-  "required": [
-    "word",
-    "lang_code",
-    "lang_name"
-  ],
-  "title": "Spanish Wiktionary",
-  "type": "object"
-}
\ No newline at end of file
diff --git a/json_schema/generate_schema.py b/json_schema/generate_schema.py
index edfaa471..e224406f 100644
--- a/json_schema/generate_schema.py
+++ b/json_schema/generate_schema.py
@@ -1,5 +1,6 @@
 import importlib
 import json
+from pathlib import Path
 from importlib.resources import files
 
 
@@ -11,6 +12,8 @@ def main() -> None:
     """
 
     extractor_folder = files("wiktextract") / "extractor"
+    output_path = Path("_site")
+    output_path.mkdir(exist_ok=True)
     for extractor_folder in filter(
         lambda p: p.is_dir(), (files("wiktextract") / "extractor").iterdir()
     ):
@@ -24,8 +27,8 @@ def main() -> None:
             model_schema[
                 "$schema"
             ] = "https://json-schema.org/draft/2020-12/schema"
-            with open(
-                f"json_schema/{lang_code}.json", "w", encoding="utf-8"
+            with (output_path / f"{lang_code}.json").open(
+                "w", encoding="utf-8"
             ) as f:
                 json.dump(
                     model_schema,
diff --git a/json_schema/ru.json b/json_schema/ru.json
deleted file mode 100644
index 8955fbee..00000000
--- a/json_schema/ru.json
+++ /dev/null
@@ -1,199 +0,0 @@
-{
-  "$defs": {
-    "Sound": {
-      "additionalProperties": false,
-      "properties": {
-        "audio": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "Audio file name",
-          "title": "Audio"
-        },
-        "flac_url": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Flac Url"
-        },
-        "homophones": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "description": "Words with same pronunciation",
-          "title": "Homophones"
-        },
-        "ipa": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "description": "International Phonetic Alphabet",
-          "title": "Ipa"
-        },
-        "mp3_url": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Mp3 Url"
-        },
-        "oga_url": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Oga Url"
-        },
-        "ogg_url": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Ogg Url"
-        },
-        "tags": {
-          "anyOf": [
-            {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": [],
-          "description": "Specifying the variant of the pronunciation",
-          "title": "Tags"
-        },
-        "wav_url": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Wav Url"
-        }
-      },
-      "title": "Sound",
-      "type": "object"
-    }
-  },
-  "$id": "https://kaikki.org/ru.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "description": "WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.",
-  "properties": {
-    "categories": {
-      "default": [],
-      "description": "list of non-disambiguated categories for the word",
-      "items": {
-        "type": "string"
-      },
-      "title": "Categories",
-      "type": "array"
-    },
-    "lang_code": {
-      "description": "Wiktionary language code",
-      "examples": [
-        "ru"
-      ],
-      "title": "Lang Code",
-      "type": "string"
-    },
-    "lang_name": {
-      "description": "Localized language name of the word",
-      "examples": [
-        "Русский"
-      ],
-      "title": "Lang Name",
-      "type": "string"
-    },
-    "pos": {
-      "default": null,
-      "description": "Part of speech type",
-      "title": "Pos",
-      "type": "string"
-    },
-    "pos_title": {
-      "default": null,
-      "description": "Original POS title",
-      "title": "Pos Title",
-      "type": "string"
-    },
-    "sounds": {
-      "anyOf": [
-        {
-          "items": {
-            "$ref": "#/$defs/Sound"
-          },
-          "type": "array"
-        },
-        {
-          "type": "null"
-        }
-      ],
-      "default": [],
-      "title": "Sounds"
-    },
-    "word": {
-      "description": "word string",
-      "title": "Word",
-      "type": "string"
-    }
-  },
-  "required": [
-    "word",
-    "lang_code",
-    "lang_name"
-  ],
-  "title": "Russian Wiktionary",
-  "type": "object"
-}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 88d44ccb..26ab1dcd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,6 +69,9 @@ omit = [
     "src/wiktextract/taxondata.py",  # huge file
 ]
 
+[tool.coverage.html]
+directory = "_site/htmlcov"
+
 [tool.black]
 line-length = 80
 
diff --git a/tools/github_pages.py b/tools/github_pages.py
new file mode 100644
index 00000000..7fb1cfeb
--- /dev/null
+++ b/tools/github_pages.py
@@ -0,0 +1,43 @@
+from pathlib import Path
+
+
+def main():
+    """
+    Generate a simple HTML page to list files in the `_site` folder.
+    """
+    html = """
+    <!DOCTYPE HTML>
+    <html lang="en-US">
+        <head>
+            <meta charset="utf-8" />
+            <meta name="viewport" content="width=device-width" />
+            <title>wiktextract</title>
+        </head>
+        <body>
+            <h1>wiktextract</h1>
+            <h2><a href="htmlcov/index.html">Coverage report</a></h2>
+            <h2>JSON schema</h2>
+            <ul>
+    """
+
+    json_schemas = [
+        path.name
+        for path in Path("_site").iterdir()
+        if path.is_file() and path.suffix == ".json"
+    ]
+    json_schemas.sort()
+    for schema in json_schemas:
+        html += f"<li><a href='{schema}'>{schema}</a></li>"
+
+    html += """
+        </ul>
+        </body>
+    </html>
+    """
+
+    with open("_site/index.html", "w", encoding="utf-8") as f:
+        f.write(html)
+
+
+if __name__ == "__main__":
+    main()

From 48865502a0e3802f60c442b9950e4cb6fb07ae9c Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Thu, 7 Dec 2023 09:45:39 +0800
Subject: [PATCH 3/5] Move `generate_schema.py` to `tools` folder

---
 Makefile                                  |  2 +-
 json_schema/validate.py                   | 36 -----------------------
 pyproject.toml                            |  1 -
 {json_schema => tools}/generate_schema.py |  0
 4 files changed, 1 insertion(+), 38 deletions(-)
 delete mode 100644 json_schema/validate.py
 rename {json_schema => tools}/generate_schema.py (100%)

diff --git a/Makefile b/Makefile
index eff16dda..5d52f200 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ coverage_report:
 	python -m coverage combine
 	python -m coverage html
 github_pages:
-	python json_schema/generate_schema.py
+	python tools/generate_schema.py
 	cp json_schema/*.json _site
 	python tools/github_pages.py
 clean:
diff --git a/json_schema/validate.py b/json_schema/validate.py
deleted file mode 100644
index 1fb53e46..00000000
--- a/json_schema/validate.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import argparse
-import json
-from concurrent.futures import ProcessPoolExecutor
-from functools import partial
-from pathlib import Path
-
-
-def worker(line, schema={}):
-    from jsonschema import validate
-
-    validate(instance=json.loads(line), schema=schema)
-
-
-def main():
-    """
-    Validate extracted JSONL file with JSON schema.
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument("jsonl_path", type=Path)
-    parser.add_argument("schema_path", type=Path)
-    args = parser.parse_args()
-
-    with (
-        args.jsonl_path.open(encoding="utf-8") as jsonl_f,
-        args.schema_path.open(encoding="utf-8") as schema_f,
-        ProcessPoolExecutor() as executor,
-    ):
-        schema = json.load(schema_f)
-        for _ in executor.map(
-            partial(worker, schema=schema), jsonl_f, chunksize=1000
-        ):
-            pass
-
-
-if __name__ == "__main__":
-    main()
diff --git a/pyproject.toml b/pyproject.toml
index 26ab1dcd..9b0bbe93 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,7 +41,6 @@ dependencies = [
 dev = [
     "black",
     "coverage[toml]",
-    "jsonschema",
     "mypy",
     "ruff",
 ]
diff --git a/json_schema/generate_schema.py b/tools/generate_schema.py
similarity index 100%
rename from json_schema/generate_schema.py
rename to tools/generate_schema.py

From afcf31c927b714e5bedc9a7ba271ff8781802f5f Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Thu, 7 Dec 2023 10:18:46 +0800
Subject: [PATCH 4/5] Use schema title in file link

---
 tools/github_pages.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/tools/github_pages.py b/tools/github_pages.py
index 7fb1cfeb..967eb38a 100644
--- a/tools/github_pages.py
+++ b/tools/github_pages.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 
 
@@ -18,23 +19,24 @@ def main():
             <h2><a href="htmlcov/index.html">Coverage report</a></h2>
             <h2>JSON schema</h2>
             <ul>
+            <schema_list>
+            </ul>
+        </body>
+    </html>
     """
 
-    json_schemas = [
-        path.name
+    schema_paths = [
+        path
         for path in Path("_site").iterdir()
         if path.is_file() and path.suffix == ".json"
     ]
-    json_schemas.sort()
-    for schema in json_schemas:
-        html += f"<li><a href='{schema}'>{schema}</a></li>"
-
-    html += """
-        </ul>
-        </body>
-    </html>
-    """
-
+    schema_paths.sort(key=lambda p: p.name)
+    schema_list_html = ""
+    for schema_path in schema_paths:
+        with schema_path.open(encoding="utf-8") as f:
+            schema_data = json.load(f)
+            schema_list_html += f"<li><a href='{schema_path.name}'>{schema_data.get('title')}</a></li>"
+    html = html.replace("<schema_list>", schema_list_html)
     with open("_site/index.html", "w", encoding="utf-8") as f:
         f.write(html)
 

From 4620fc82b00f270daa02f6ed35881ed4e1ef7e2b Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Thu, 7 Dec 2023 10:36:55 +0800
Subject: [PATCH 5/5] Add commit link that triggered the action

---
 .github/workflows/test.yml |  2 +-
 Makefile                   |  5 ++++-
 tools/github_pages.py      | 11 +++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e463410f..89da35f1 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -35,7 +35,7 @@ jobs:
           PYTHONWARNINGS: default
       - run: |
           make coverage_report
-          make github_pages
+          make github_pages REPO=${{ github.repository }} SHA=${{ github.sha }}
         if: github.ref_name == 'master' && matrix.python-version == '3.12'
       - uses: actions/upload-pages-artifact@v2
         if: github.ref_name == 'master' && matrix.python-version == '3.12'
diff --git a/Makefile b/Makefile
index 5d52f200..7ca5dfcb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,8 @@
 # Run "make test" to run tests
 # Run "make clean" to remove automatically generated files
+REPO ?= tatuylonen/wiktextract
+SHA ?= HEAD
+
 test:
 	python -m unittest discover -b -s tests
 test_coverage:
@@ -11,7 +14,7 @@ coverage_report:
 github_pages:
 	python tools/generate_schema.py
 	cp json_schema/*.json _site
-	python tools/github_pages.py
+	python tools/github_pages.py $(REPO) $(SHA)
 clean:
 	python -m coverage erase
 	rm -rf __pycache__ _site
diff --git a/tools/github_pages.py b/tools/github_pages.py
index 967eb38a..64720749 100644
--- a/tools/github_pages.py
+++ b/tools/github_pages.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 from pathlib import Path
 
@@ -6,6 +7,11 @@ def main():
     """
     Generate a simple HTML page to list files in the `_site` folder.
     """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("repo", help="The owner and repository name.")
+    parser.add_argument("sha", help="The commit SHA.")
+    args = parser.parse_args()
+
     html = """
     <!DOCTYPE HTML>
     <html lang="en-US">
@@ -21,6 +27,7 @@ def main():
             <ul>
             <schema_list>
             </ul>
+            <commit_sha>
         </body>
     </html>
     """
@@ -37,6 +44,10 @@ def main():
             schema_data = json.load(f)
             schema_list_html += f"<li><a href='{schema_path.name}'>{schema_data.get('title')}</a></li>"
     html = html.replace("<schema_list>", schema_list_html)
+
+    commit_sha = f"<p>Commit: <a href='https://github.com/{args.repo}/commit/{args.sha}'>{args.sha[:7]}</a></p>"
+    html = html.replace("<commit_sha>", commit_sha)
+
     with open("_site/index.html", "w", encoding="utf-8") as f:
         f.write(html)