diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f05ac76b..d410cca5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,6 +32,7 @@ jobs: - name: init playwright nbconvert-a11y run: | playwright install --with-deps chromium + npm install vnu-jar pip install -e. doit copy - name: test with pytest diff --git a/test-environment.yml b/test-environment.yml index 417d31c1..3ef48c81 100644 --- a/test-environment.yml +++ b/test-environment.yml @@ -7,7 +7,6 @@ dependencies: - openjdk - pip - pip: - - html5validator - pytest-playwright - nbval - accessible-pygments @@ -15,6 +14,7 @@ dependencies: - markdown-it-py[plugins,linkify] - python-slugify - html5lib + - nodejs - playwright - nbconvert - pytest diff --git a/tests/test_w3c.py b/tests/test_w3c.py new file mode 100644 index 00000000..c7103a09 --- /dev/null +++ b/tests/test_w3c.py @@ -0,0 +1,92 @@ +# requires node +# requires jvm + +import itertools, operator, functools, collections, exceptiongroup, re +import pathlib, json, subprocess, shlex + +EXCLUDE = re.compile( + """or with a “role” attribute whose value is “table”, “grid”, or “treegrid”.$""" + # https://github.com/validator/validator/issues/1125 +) + + +@functools.lru_cache(1) +def vnu_jar(): + VNU_JAR = ( + pathlib.Path(subprocess.check_output(shlex.split("npm root vnu-jar")).strip().decode()) + / "vnu-jar/build/dist/vnu.jar" + ) + assert VNU_JAR.exists() + return VNU_JAR + + +def validate_html(*files: pathlib.Path) -> dict: + return json.loads( + subprocess.check_output( + shlex.split(f"java -jar {vnu_jar()} --stdout --format json --exit-zero-always") + + list(files) + ).decode() + ) + + +def organize_validator_results(results): + collect = collections.defaultdict(functools.partial(collections.defaultdict, list)) + for (error, msg), group in itertools.groupby( + results["messages"], key=operator.itemgetter("type", "message") + ): + for item in group: + collect[error][msg].append(item) + return collect + +def raise_if_errors(results, exclude=EXCLUDE): + collect = organize_validator_results(results) + exceptions = [] + for msg in collect["error"]: + if not exclude or not exclude.search(msg): + exceptions.append(exceptiongroup.ExceptionGroup(msg, [Exception(x["extract"]) for x in collect["error"][msg]])) + if exceptions: + raise exceptiongroup.ExceptionGroup("nu validator errors", exceptions) + + +import dataclasses +from json import dumps, loads +from logging import getLogger +from pathlib import Path + +import exceptiongroup +from test_nbconvert_html5 import exporter + + +from pytest import fixture, mark + +HERE = Path(__file__).parent +NOTEBOOKS = HERE / "notebooks" +EXPORTS = HERE / "exports" +HTML = EXPORTS / "html" +LOGGER = getLogger(__name__) +VALIDATOR = EXPORTS / "validator" + +# it would be possible to test loaded baseline documents with playwright. +# export the resting state document and pass them to the validator. +# this would be better validate widgets. + +@mark.parametrize( + "notebook", + list( + x + for x in NOTEBOOKS.glob("*.ipynb") + if x.name not in {"Imaging_Sky_Background_Estimation.ipynb"} + ), +) +def test_baseline_w3c(page, exporter, notebook): + target = HTML / notebook.with_suffix(".html").name + target.parent.mkdir(exist_ok=True, parents=True) + target.write_text(exporter.from_filename(notebook)[0]) + + result = validate_html(target) + VALIDATOR.mkdir(parents=True, exist_ok=True) + audit = VALIDATOR / notebook.with_suffix(".json").name + LOGGER.info(f"""writing {audit} with {len(result.get("violations", ""))} violations""") + audit.write_text(dumps(result)) + + raise_if_errors(result) \ No newline at end of file