From e12fd14ea2a02008f6e68a240e50ecd36665585b Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Tue, 30 Jan 2024 03:57:49 +1100 Subject: [PATCH] Add initial vale configuration and apply editorial changes to documentation (#3567) * Fix editorial issues raised by Vale * Add vale configuration * Prepare vale pre-commit hook * Add .vale/justfile to help local development of styles * Generate DAG docs for Vale editorial change * Configure easier to use just recipes * Revert unnecessary lint recipe changes * Update Vale config documentation * Always run using local Vale docker image and pin Vale package versions * Prevent CI docker issue * Check all markdown files, not just documentation site * Prevent false positives for term casing * Check admonition blocks * Include mdx as markdown * Add .vale to CODEOWNERS * Address vale errors for files added during rebase --- .github/CODEOWNERS | 1 + .github/filters.yml | 1 + .github/workflows/ci_cd.yml | 2 +- .pre-commit-config.yaml | 10 +++ .vale/.gitignore | 3 + .vale/.vale.ini | 43 +++++++++++++ .vale/Dockerfile | 9 +++ .vale/README.md | 29 +++++++++ .vale/justfile | 64 +++++++++++++++++++ .vale/styles/Openverse/README.md | 8 +++ .vale/styles/Openverse/TermCasing.yml | 21 ++++++ .vale/styles/Vocab/Openverse/accept.txt | 3 + .../python/workflows/set_matrix_images.py | 1 + .../provider_api_scripts/inaturalist.py | 2 +- catalog/docs/data_models.md | 8 ++- catalog/docs/provider_data_ingester_faq.md | 6 +- .../api/reference/search_algorithm.md | 15 +++-- .../catalog/guides/adding_a_new_provider.md | 5 ++ documentation/catalog/guides/quickstart.md | 7 +- documentation/catalog/reference/DAGs.md | 6 +- .../frontend/reference/playwright_tests.md | 8 +-- .../frontend/reference/storybook_tests.md | 9 ++- documentation/general/contributing.md | 12 ++-- documentation/general/general_setup.md | 6 +- documentation/general/logging.md | 2 +- documentation/general/quickstart.md | 6 +- documentation/general/test.md | 4 +- .../zero_downtime_database_management.md | 17 ++--- .../ingestion_server/guides/troubleshoot.md | 6 +- documentation/meta/ci_cd/artifacts.md | 4 +- documentation/meta/ci_cd/flow.md | 10 +-- .../meta/ci_cd/proof_of_functionality.md | 2 +- documentation/meta/contribution/codespell.md | 6 +- .../meta/maintenance/elasticsearch_cluster.md | 9 +-- .../meta/monitoring/cloudwatch_logs/index.md | 16 ++--- .../runbooks/unhealthy_ecs_hosts.md | 2 - ...ntifying-and-blocking-traffic-anomalies.md | 4 +- documentation/projects/planning.md | 2 +- .../yearly_planning/process_outline.md | 12 ++-- frontend/nuxt-template-overrides/README.md | 2 +- .../components/VModal/meta/VModal.stories.mdx | 4 +- justfile | 1 + 42 files changed, 298 insertions(+), 90 deletions(-) create mode 100644 .vale/.gitignore create mode 100644 .vale/.vale.ini create mode 100644 .vale/Dockerfile create mode 100644 .vale/README.md create mode 100644 .vale/justfile create mode 100644 .vale/styles/Openverse/README.md create mode 100644 .vale/styles/Openverse/TermCasing.yml create mode 100644 .vale/styles/Vocab/Openverse/accept.txt diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index dd6660b7a0f..22ae3e35fc3 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -25,6 +25,7 @@ tsconfig.base.json @WordPress/openverse-frontend # Specific assignments for the 'openverse-maintainers' group .codespell/ @WordPress/openverse-maintainers +.vale/ @WordPress/openverse-maintainers .devcontainer/ @WordPress/openverse-maintainers .github/ @WordPress/openverse-maintainers automations/ @WordPress/openverse-maintainers diff --git a/.github/filters.yml b/.github/filters.yml index 52378c5b24b..a94b5b272f5 100644 --- a/.github/filters.yml +++ b/.github/filters.yml @@ -60,6 +60,7 @@ lint: - .pre-commit-config.yaml - tsconfig.base.json - .codespell + - .vale mgmt: - .github/** - .devcontainer/** diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index f5334d6e7a7..be431952b32 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -219,7 +219,7 @@ jobs: uses: docker/build-push-action@v5 with: context: ${{ matrix.context }} - target: ${{ matrix.target }} + target: ${{ matrix.target || '' }} push: false tags: openverse-${{ matrix.image }} file: ${{ matrix.file }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9778d0ccac7..cc81ea04be2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -110,6 +110,7 @@ repos: entry: bash -c 'pnpm run -r types' language: system pass_filenames: false + - id: eslint name: eslint files: (frontend|automations|packages).*?\.(js|ts|vue|json5|json)$ @@ -117,6 +118,7 @@ repos: language: system pass_filenames: false entry: bash -c 'pnpm run eslint --fix' + - id: test:unit name: test:unit files: ^(frontend|packages)/.*$ @@ -125,6 +127,7 @@ repos: pass_filenames: false stages: - push + - id: render-release-drafter name: render-release-drafter files: ^templates/.*$ @@ -132,6 +135,13 @@ repos: language: system pass_filenames: false + - id: vale + name: vale + language: system + entry: bash -c 'just .vale/run' + pass_filesnames: false + files: (.vale/.*|.mdx?)$ + - repo: https://github.com/renovatebot/pre-commit-hooks rev: 37.116.0 hooks: diff --git a/.vale/.gitignore b/.vale/.gitignore new file mode 100644 index 00000000000..f8cea4a9562 --- /dev/null +++ b/.vale/.gitignore @@ -0,0 +1,3 @@ +styles/* +!styles/Vocab/ +!styles/Openverse/ diff --git a/.vale/.vale.ini b/.vale/.vale.ini new file mode 100644 index 00000000000..f3be33fd614 --- /dev/null +++ b/.vale/.vale.ini @@ -0,0 +1,43 @@ +StylesPath = styles + +MinAlertLevel = suggestion +Vocab = Openverse +# Using an explicit reference to downloads for package zips rather than the shorthand allows us to +# pin the package to a specific version. Otherwise, changes in the upstream style configuration can +# cause sudden and unexpected failures during linting. +Packages = https://github.com/errata-ai/proselint/releases/download/v0.3.3/proselint.zip + +# The default settings also ignore `pre` and `code`, which includes admonition code blocks. +# We may encounter false positives by including code blocks, but we use admonitions +# so frequently in Openverse documentation that it is probably worth it to include them. +SkippedScopes = script, style, figure +IgnoredScopes = tt + +[formats] +# Treat mdx as markdown (for Storybook support) +mdx = md + +# Only configure Markdown, because that's the only language we use to write documentation in +# (Except MDX, but if we want to add Vale there it will have to be at a later date to avoid complexity +# in the initial setup) +[*.md] +BasedOnStyles = proselint, Vale, Openverse + +# Avoid Vale.Spelling for now, but we do want Vale.Terms for the vocab at least +Vale.Spelling = NO + +# The suggested terms are obscure +proselint.AnimalLabels = NO + +# These are rarely necessary changes and can be more fiddly than they are helpful +proselint.Typography = NO + +# proselint.Needless considers a host of terms "needless" which are natural and wide-spread changes in the English language, +# and is clearly favouring US changes rather than others (the US isn't the only or even the largest English speaking country). +# You don't have to look far to find examples of this rule preferring variants of terms in line with 18th century elite preferences: +# https://www.merriam-webster.com/grammar/preventive-or-preventative +# In other words, it's a linguistic prescriptive holdover to insist on some of these terms being "needless" when they're in +# fact widespread and well understood. Not all the terms have this issue, but we'd have to rewrite +# practically the entire rule to sort out which ones we cared about. If we find any we _do_ care about, we can add them to our +# own substitution rule. +proselint.Needless = NO diff --git a/.vale/Dockerfile b/.vale/Dockerfile new file mode 100644 index 00000000000..9bd1fd58e0c --- /dev/null +++ b/.vale/Dockerfile @@ -0,0 +1,9 @@ +FROM jdkato/vale:v2.30.0 + +WORKDIR /vale +COPY .vale.ini . +COPY styles styles + +RUN vale sync + +ENTRYPOINT ["vale", "--config=/vale/.vale.ini"] diff --git a/.vale/README.md b/.vale/README.md new file mode 100644 index 00000000000..f00bf6f26ed --- /dev/null +++ b/.vale/README.md @@ -0,0 +1,29 @@ +# Openverse Vale configuration + +Openverse runs Vale using Docker. This bypasses the need for contributors to +install the Vale binary on their computers. It also prevents Vale styles getting +downloaded into the repository in clear text, which is critical to avoid lists +of sensitive terms being accidentally too-easily available. + +For more information about this motivation to avoid lists of sensitive terms in +the Openverse monorepo, refer to the README at +[WordPress/openverse-sensitive-terms](https://github.com/WordPress/openverse-sensitive-terms). + +To run Vale with Openverse's configuration, use the just recipe: + +``` +$ just .vale/run +``` + +This recipe _always_ builds Vale. The Openverse Vale docker image is fast to +build, and the most expensive steps are cached. Docker will automatically reuse +the pre-existing image unless there are changes to the Vale configuration. + +Typically it is unnecessary to run Vale directly, as pre-commit automatically +runs Vale on each commit. You only need to run Vale directly when iterating on +changes to Openverse's Vale configuration. + +Refer to the `VALE_FILES` variable [in the justfile](./justfile) to identify +which files we currently check with Vale. A comment on the variable explains the +rationale for that choice. The list of files will ideally expand in the future +to include all textual content in the repository. diff --git a/.vale/justfile b/.vale/justfile new file mode 100644 index 00000000000..f191286503e --- /dev/null +++ b/.vale/justfile @@ -0,0 +1,64 @@ +COLOR := "\\033[0;34m" +NO_COLOR := "\\033[0m" + + +# Show all available recipes +@_default: + printf "\n{{ COLOR }}# Vale (path: \`.vale/\`)\n" + printf "===================={{ NO_COLOR }}\n" + just --list --unsorted + + +# Build a local version of `openverse-vale:local` for testing +build: + docker build . -t openverse-vale:local + + +# The --glob setting excludes things that should not be linted, including +# build artefacts, dependencies, and automatically generated files like +# the changelogs (without excluding `changelogs/index.md`). Project proposals +# are also excluded for want of a good way to ignore existing proposals and only +# lint _new_ proposals. Project proposals aren't "living documents" in the way +# the rest of our documentation is, so it doesn't seem right to retroactively +# edit them for mere editorial purposes (rather than, for example, to correct +# some grave inacurracy). +# Leading `,` compensates for lack of trailing comma support +# Note the lack of space before the trailing \ on each line, this is to prevent +# the addition of a space between each pattern, which isn't supported by Vale's glob +# If you change this and Vale takes a very long time to run (more than 30 seconds) +# then chances are the change is breaking the glob pattern. Unfortunately the only +# feedback you get when the glob pattern is not working as intended is a very long +# run time for Vale as it checks everything that should have otherwise been ignored, +# but wasn't due to some minor issue in the pattern syntax. +# This is fiddly, but I can't find a good way around it. +_IGNORE_PATTERNS := """ + _build\ + ,_static\ + ,venv\ + ,.venv\ + ,.nuxt\ + ,.pnpm-store\ + ,node_modules\ + ,test-results\ + ,storebook-static\ + ,.ruff-cache\ + ,projects/proposals\ + ,changelogs/api\ + ,changelogs/frontend\ + ,changelogs/catalog\ + ,changelogs/ingestion_server\ +""" + +VALE_GLOB := "--glob='!*{" + _IGNORE_PATTERNS + "}*'" + + +# Run Vale configured for Openverse in Docker. +# Using Docker avoids the need for contributors to install the Vale binary. +#Configuration defaults to what is used for CI. +run +files=".": build + docker run --rm \ + -v $PWD/..:/src:rw,Z \ + --workdir=/src \ + openverse-vale:local \ + {{ files }} \ + {{ VALE_GLOB }} diff --git a/.vale/styles/Openverse/README.md b/.vale/styles/Openverse/README.md new file mode 100644 index 00000000000..1133ae799ed --- /dev/null +++ b/.vale/styles/Openverse/README.md @@ -0,0 +1,8 @@ +# Openverse vale style + +The Openverse vale style is manually curated by Openverse maintainers. These +rules are primarily meant to cater to exceptions in the Openverse documentation +style that do not fit into pre-existing style guides, whether because of +disagreements in style or different institutional or contextual requirements +based on Openverse's domain and nature as a FOSS project within the WordPress +community. diff --git a/.vale/styles/Openverse/TermCasing.yml b/.vale/styles/Openverse/TermCasing.yml new file mode 100644 index 00000000000..90ecadc2eb8 --- /dev/null +++ b/.vale/styles/Openverse/TermCasing.yml @@ -0,0 +1,21 @@ +# While Vale.Terms could be used for these, it is too inflexible, and thus incapable +# of ignoring common false-positives like GitHub usernames or GitHub team names +extends: substitution +message: "Incorrect casing. Use '%s' instead of '%s'." +level: error +ignorecase: false +scope: + - paragraph +swap: + # [^/\.] prevents matching things that look like URLs, file paths, or GitHub team mentions + # For example: @WordPress/openverse-maintainers + '[^/\.]openverse[^.\.]': Openverse + # OpenVerse should never be used, except as an example of something that is always wrong, + # in which case we'll tell Vale to ignore that line. + "OpenVerse": Openverse + '[^/\.]wordpress[^.\.]': WordPress + # Wordpress is the same as OpenVerse + "Wordpress": WordPress + '[^/\.]github[^.\.]': GitHub + # Github is the same as Wordpress and OpenVerse + "Github": GitHub diff --git a/.vale/styles/Vocab/Openverse/accept.txt b/.vale/styles/Vocab/Openverse/accept.txt new file mode 100644 index 00000000000..91d4a023bae --- /dev/null +++ b/.vale/styles/Vocab/Openverse/accept.txt @@ -0,0 +1,3 @@ +# This matches "cliche" rulesets, but we actually use this term in earnest and with clear explanation of what we mean by it +Decision-Making Process +decision-making process diff --git a/automations/python/workflows/set_matrix_images.py b/automations/python/workflows/set_matrix_images.py index cbaddce1511..acc4e4cf1ff 100644 --- a/automations/python/workflows/set_matrix_images.py +++ b/automations/python/workflows/set_matrix_images.py @@ -55,6 +55,7 @@ def ser_set(x): build_matrix["image"] |= {"frontend", "frontend_nginx"} publish_matrix["image"] |= {"frontend", "frontend_nginx"} + build_matrix["include"] = [includes[item] for item in build_matrix["image"]] for item in build_matrix["include"]: diff --git a/catalog/dags/providers/provider_api_scripts/inaturalist.py b/catalog/dags/providers/provider_api_scripts/inaturalist.py index 532ba0f92d5..81443cf2f6a 100644 --- a/catalog/dags/providers/provider_api_scripts/inaturalist.py +++ b/catalog/dags/providers/provider_api_scripts/inaturalist.py @@ -7,7 +7,7 @@ https://api.inaturalist.org/v1/docs/ But there is a full dump intended for sharing on S3. https://github.com/inaturalist/inaturalist-open-data/tree/documentation/Metadata - Because these are very large normalized tables, as opposed to more document + Because these are exceptionally large normalized tables, as opposed to more document oriented API responses, we found that bringing the data into postgres first was the most effective approach. More detail in slack here: https://wordpress.slack.com/archives/C02012JB00N/p1653145643080479?thread_ts=1653082292.714469&cid=C02012JB00N diff --git a/catalog/docs/data_models.md b/catalog/docs/data_models.md index cc1e78cf2b2..96d16085dc6 100644 --- a/catalog/docs/data_models.md +++ b/catalog/docs/data_models.md @@ -1,6 +1,8 @@ -__ +> **Note** +> +> This documentation is temporary and should be replaced by more thorough +> documentation of our DB fields in +> https://github.com/WordPress/openverse/issues/412. # Data Models diff --git a/catalog/docs/provider_data_ingester_faq.md b/catalog/docs/provider_data_ingester_faq.md index b6848faab7d..64a9ea541d3 100644 --- a/catalog/docs/provider_data_ingester_faq.md +++ b/catalog/docs/provider_data_ingester_faq.md @@ -63,8 +63,10 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None: ... ``` -**NOTE**: When doing this, keep in mind that adding too many requests may slow -down ingestion. Be aware of rate limits from your provider API as well. +> **Note** +> +> When doing this, keep in mind that adding too many requests may slow down +> ingestion. Be aware of rate limits from your provider API as well. ## What if my API endpoint isn't static and needs to change from one request to another? diff --git a/documentation/api/reference/search_algorithm.md b/documentation/api/reference/search_algorithm.md index 1287be1c808..6358b2f33b7 100644 --- a/documentation/api/reference/search_algorithm.md +++ b/documentation/api/reference/search_algorithm.md @@ -1,12 +1,13 @@ # Search Algorithm Openverse currently uses a relatively simple and naïve search algorithm with -very limited options. The documentation on this page was written by referencing -the code in Openverse as well as parts of Openverse's historical development. -Parts of the story for how Openverse's indexes came to be configured as they are -today are likely missing. Future improvements to Openverse's indexing and search -will be more carefully documented here and in the code to ensure there is -greater longevity of understanding. +restricted modifications to the default Elasticsearch behaviour. The +documentation on this page was written by referencing the code in Openverse as +well as parts of Openverse's historical development. Parts of the story for how +Openverse's indexes came to be configured as they are today are likely missing. +Future improvements to Openverse's indexing and search will be more carefully +documented here and in the code to ensure there is greater longevitiy of +understanding. > **Note**: This document avoids covering details covered in the > [Openverse Search Guide](https://wordpress.org/openverse/search-help). @@ -176,7 +177,7 @@ aspects of a document: > to potentially change this fact. Of these, title is weighted 10000 times more heavily than the description and -tags. This makes searches that match a title very closely rise to the "top" of +tags. This makes works whose titles closely match the query rise to the "top" of the results, even if the same text is present word-for-word in a description. It also breaks ties between documents, if, for example, two documents are returned, one because the title matches and one because a tag matches, the title-matched diff --git a/documentation/catalog/guides/adding_a_new_provider.md b/documentation/catalog/guides/adding_a_new_provider.md index 141db8f5e94..ef6f8e9a815 100644 --- a/documentation/catalog/guides/adding_a_new_provider.md +++ b/documentation/catalog/guides/adding_a_new_provider.md @@ -78,6 +78,9 @@ that can be used to generate the files you'll need and get you started: You should see output similar to this: + + + ``` Creating files in /Users/staci/projects/openverse-projects/openverse API script: openverse/catalog/dags/providers/provider_api_scripts/foobar_museum.py @@ -86,6 +89,8 @@ API script test: openverse/catalog/tests/dags/providers/provider_api_scripts/t NOTE: You will also need to add a new ProviderWorkflow dataclass configuration to the PROVIDER_WORKFLOWS list in `openverse-catalog/dags/providers/provider_workflows.py`. ``` + + This generates a provider script with a templated `ProviderDataIngester` for you in the [`provider_api_scripts` folder](https://github.com/WordPress/openverse/tree/main/catalog/dags/providers/provider_api_scripts), diff --git a/documentation/catalog/guides/quickstart.md b/documentation/catalog/guides/quickstart.md index cdb2472781f..a195b11a442 100644 --- a/documentation/catalog/guides/quickstart.md +++ b/documentation/catalog/guides/quickstart.md @@ -49,12 +49,17 @@ command runner installed. To set up the local python environment along with the pre-commit hook, run: + + + ```shell python3 -m venv venv source venv/bin/activate just catalog/install ``` + + The containers will be built when starting the stack up for the first time. If you'd like to build them prior to that, run: @@ -171,7 +176,7 @@ just down -v `docker volume prune` can also be useful if you've already stopped the running containers, but be warned that it will remove all volumes associated with -stopped containers, not just openverse-catalog ones. +stopped containers, not just catalog ones. To fully recreate everything from the ground up, you can use: diff --git a/documentation/catalog/reference/DAGs.md b/documentation/catalog/reference/DAGs.md index d0d5abaaf00..abdec759661 100644 --- a/documentation/catalog/reference/DAGs.md +++ b/documentation/catalog/reference/DAGs.md @@ -872,9 +872,9 @@ Notes: The iNaturalist API is not intended for data scraping. https://api.inaturalist.org/v1/docs/ But there is a full dump intended for sharing on S3. https://github.com/inaturalist/inaturalist-open-data/tree/documentation/Metadata -Because these are very large normalized tables, as opposed to more document -oriented API responses, we found that bringing the data into postgres first was -the most effective approach. More detail in slack here: +Because these are exceptionally large normalized tables, as opposed to more +document oriented API responses, we found that bringing the data into postgres +first was the most effective approach. More detail in slack here: https://wordpress.slack.com/archives/C02012JB00N/p1653145643080479?thread_ts=1653082292.714469&cid=C02012JB00N We use the table structure defined here, https://github.com/inaturalist/inaturalist-open-data/blob/main/Metadata/structure.sql diff --git a/documentation/frontend/reference/playwright_tests.md b/documentation/frontend/reference/playwright_tests.md index 409a55d21c0..3a56ecdffb3 100644 --- a/documentation/frontend/reference/playwright_tests.md +++ b/documentation/frontend/reference/playwright_tests.md @@ -178,10 +178,10 @@ automatically by Playwright and will be placed in the `test-results` folder under the fully qualified name of the test that failed (with every parent describe block included). -Additionally, you can run run the tests in debug mode. This will run the tests -with a headed browser as opposed to a headless (invisible) one and allow you to -watch the test happen in real time. It's not possible for a headed browser to -run inside the docker container, however, so be aware that when debugging the +Additionally, you can run the tests in debug mode. This will run the tests with +a headed browser as opposed to a headless (invisible) one and allow you to watch +the test happen in real time. It's not possible for a headed browser to run +inside the docker container, however, so be aware that when debugging the environment will be slightly different. For example, if you're on any OS other than Linux, the browser you're running will have small differences in how it renders the page compared to the docker container. diff --git a/documentation/frontend/reference/storybook_tests.md b/documentation/frontend/reference/storybook_tests.md index 9502c1e187e..6deb6bc28c7 100644 --- a/documentation/frontend/reference/storybook_tests.md +++ b/documentation/frontend/reference/storybook_tests.md @@ -32,16 +32,15 @@ just frontend/run test:storybook ``` This will run the tests inside a docker container. Should you wish to run the -tests locally, you can use the following: +tests locally, you can use the command below. However, please note that unless +you are running the same operating system (down to the distro, in some cases) +there are likely to be visual rendering differences that will cause snapshot +tests to fail on false-positives. ```bash just frontend/run test:storybook:local ``` -But please note that unless you are running the same operating system (down to -the distro, in some cases) there are likely to be visual rendering differences -that will cause snapshot tests to fail on false-positives. - ## Writing tests It is preferable to write tests using the iframed version of the component to diff --git a/documentation/general/contributing.md b/documentation/general/contributing.md index 75e2ee97de1..77cadba1d36 100644 --- a/documentation/general/contributing.md +++ b/documentation/general/contributing.md @@ -119,8 +119,7 @@ in the design library file is constantly being added to and improved, some documentation may be missing. If you have doubts, ask on [#design channel](http://wordpress.slack.com/messages/design/) for clarification. If you discover new information that is yet to be documented, -contributing this information back to the documentation is very much -appreciated. +contributing this information back to the documentation is deeply appreciated. Once you are done and ready to share your idea, [create an issue with the `design` label and fill in the template](https://github.com/WordPress/openverse/issues/new?assignees=&labels=🖼️+aspect%3A+design%2C✨+goal%3A+improvement%2C+🚦+status%3A+awaiting+triage&template=feature_request.md&title=). @@ -139,11 +138,11 @@ discussion evolves before implementation begins. You can also contribute to Openverse by translating it. An overview of Openverse translations is here: -[https://translate.wordpress.org/projects/meta/openverse/](https://translate.wordpress.org/projects/meta/openverse/) + A getting started guide for translating on GlotPress (the software behind -[translate.wordpress.org](https://translate.wordpress.org)) is here: -[https://make.wordpress.org/polyglots/handbook/translating/glotpress-translate-wordpress-org/#getting-started](https://make.wordpress.org/polyglots/handbook/translating/glotpress-translate-wordpress-org/#getting-started) +) is here: + ### Providers @@ -249,8 +248,7 @@ In the course of this work, you may find issues that should be [closed because they are no longer valid](#closing-no-longer-valid-issues). Please see the linked section for how to handle those issues and [keep in mind the important caveat with regard to reproducibility covered in that section](#reproducibility). -See also [bug reproduction reproduction and triage](#bug-reproduction--triage) -above. +See also [bug reproduction and triage](#bug-reproduction--triage) above. #### Merging issues diff --git a/documentation/general/general_setup.md b/documentation/general/general_setup.md index e0222dcb6ed..8155a7a7af1 100644 --- a/documentation/general/general_setup.md +++ b/documentation/general/general_setup.md @@ -266,8 +266,8 @@ different editor if you have a preference. - [VS Code](https://code.visualstudio.com/) is an option with good out-of-the-box support for our entire stack. -- [PyCharm and WebStorm](https://www.jetbrains.com/) are other very popular - options with lots of bells and whistles. +- [PyCharm and WebStorm](https://www.jetbrains.com/) are other popular options + with lots of bells and whistles. - [Sublime Text](https://www.sublimetext.com/) is a minimalistic option that can get you off the ground quickly with lots of room for expansion through it's package system. @@ -276,7 +276,7 @@ different editor if you have a preference. ## Optional development setup -The following setup steps are only needed in very specific scenarios. +The following setup steps are only needed in a narrow set of scenarios. ### coreutils diff --git a/documentation/general/logging.md b/documentation/general/logging.md index f65f56e052e..c3f46e710c3 100644 --- a/documentation/general/logging.md +++ b/documentation/general/logging.md @@ -46,5 +46,5 @@ with these particular requests. Potential future improvements to logging in Openverse could include: -1. Even more uniform data logging format like formatting all logs as JSON. +1. Structured logging format like formatting all logs as JSON. 2. Establishing clearer practices around what log levels to use and when. diff --git a/documentation/general/quickstart.md b/documentation/general/quickstart.md index 31f3f646548..09c77e938c7 100644 --- a/documentation/general/quickstart.md +++ b/documentation/general/quickstart.md @@ -8,8 +8,8 @@ the ingestion server and the frontend. ## Stack-specific quickstarts -It is very unlikely that you want to contribute to everything, everywhere, all -at once. In all likelihood, you intend to contribute to a narrower slice of the +It is unlikely that you want to contribute to everything, everywhere, all at +once. In all likelihood, you intend to contribute to a narrower slice of the stack. In such cases, you might find it more beneficial to go through one of these stack-specific quickstart guides. @@ -18,7 +18,7 @@ these stack-specific quickstart guides. - [Ingestion server](/ingestion_server/guides/quickstart.md) - [Documentation](/meta/documentation/quickstart.md) -That said, there is something very appealing about running the full stack +That said, there is something useful and appealing about running the full stack locally, which this guide is all about. ## Prerequisites diff --git a/documentation/general/test.md b/documentation/general/test.md index 2b295966351..5785f795dfa 100644 --- a/documentation/general/test.md +++ b/documentation/general/test.md @@ -31,8 +31,8 @@ triage the issue: 1. Create an issue with critical priority documenting the flaky test. Link to failed CI runs or copy/paste relevant logs and output if they exist. The goal of this issue is not to document the solution, merely to document the fact of - the flakiness and to prioritise someone picking the issue up very soon to fix - it. Of course, if you have additional details or ideas, proactively share + the flakiness and to prioritise someone picking the issue up much sooner to + fix it. Of course, if you have additional details or ideas, proactively share those in the issue description or discussion. 1. Open a PR that skips the flaky test with a link to the issue as a comment or in the explanatory note of the skip annotation. This PR should also be marked diff --git a/documentation/general/zero_downtime_database_management.md b/documentation/general/zero_downtime_database_management.md index 72ee6fbc531..edf0c235d31 100644 --- a/documentation/general/zero_downtime_database_management.md +++ b/documentation/general/zero_downtime_database_management.md @@ -175,8 +175,8 @@ sustainable expectation, however, if deployments take a "short" amount of time. What "short" means is up for debate, but an initial benchmark can be the Openverse production frontend deployments, which currently take about 10 minutes. Longer than this seems generally unreasonable to expect someone to keep -a very close eye on the process. Sticking to zero-downtime deployments helps -keep short deployments the norm. Even though it sometimes asks us to deploy more +a close eye on the process. Sticking to zero-downtime deployments helps keep +short deployments the norm. Even though it sometimes asks us to deploy more _often_, those deployments can—and in all likelihood, should—be spread over multiple days. This makes the expectation of keeping a close watch on the deployment more sustainable long-term and helps encourage us to deploy more @@ -200,10 +200,11 @@ that the instructions above are followed to prevent the need for downtime, it is reasonable to take alternative approaches to deploying the migration. At the moment we do not have specific recommendations or policies regarding -these hopefully rare instances. If you come across the need for this, please -carefully consider the reasons why it is necessary in the particular case and -document the steps taken to prepare and deploy the migration. Please update this -document with any general findings or advice, as applicable. +these instances, because they have not proven to be common. If you come across +the need for this, please carefully consider the reasons why it is necessary in +the particular case and document the steps taken to prepare and deploy the +migration. Please update this document with any general findings or advice, as +applicable. ## Django management command based data transformations @@ -237,7 +238,7 @@ details: With those two key details in mind, the main deficiency of using migrations for data transformations may already be evident: time. Django migration based data -transformations dealing with certain smaller tables may not take very long and +transformations dealing with certain smaller tables may not take a long time and this issue, in some cases, might not be applicable. However, because it is extremely difficult to predetermine the amount of time a migration will take, even data transformations for small datasets should still heed the @@ -361,7 +362,7 @@ different running versions of the application. For environment variables, the previously running version of the application will continue to use the environment variable configuration defined in the previous task definition revision used to run that instance. Environment variables are not automatically -updated for the running application. This means that if a deployment was very +updated for the running application. This means that if a deployment was carefully orchestrated, we do not necessarily need to worry about backwards incompatible changes, and even in the [worst case described below](#manual-rollbacks-after-removing-an-environment-variable-or-updating-its-format), diff --git a/documentation/ingestion_server/guides/troubleshoot.md b/documentation/ingestion_server/guides/troubleshoot.md index 7a50af95bfa..0ff814930d6 100644 --- a/documentation/ingestion_server/guides/troubleshoot.md +++ b/documentation/ingestion_server/guides/troubleshoot.md @@ -37,9 +37,9 @@ the size of a test/staging index. To do so, follow these steps. 4. Repeat steps 2 and 3 for each active ingestion worker machine. Leave the SSH sessions open. -5. Wait for a few minutes and keep an eye on the document count in the - Elasticsearch index that was currently being created. It may increase a - little because of timing effects but should stop after a few minutes. +5. Wait for a few minutes and note the document count in the Elasticsearch index + that was currently being created. It may increase a little because of timing + effects but should stop after a few minutes. 6. From each of the open SSH sessions, send a completion notification to the ingestion server's internal IP address. diff --git a/documentation/meta/ci_cd/artifacts.md b/documentation/meta/ci_cd/artifacts.md index eaa44c5ff82..9273be337b5 100644 --- a/documentation/meta/ci_cd/artifacts.md +++ b/documentation/meta/ci_cd/artifacts.md @@ -7,7 +7,7 @@ the Summary page of a workflow run. https://github.com/WordPress/openverse/actions/runs/ ``` -These artifacts are classified into TODO types: +These artifacts are classified into the following types: - Docker images - Logs @@ -37,7 +37,7 @@ The following artifacts are logs for debugging. - `api_logs` This log file contains all the logs from all the Docker services written in - the course of the API tests. This artifact is created by by the + the course of the API tests. This artifact is created by the [`test-api`](/meta/ci_cd/jobs/api.md#test-api) job. - `ing_logs` diff --git a/documentation/meta/ci_cd/flow.md b/documentation/meta/ci_cd/flow.md index 8b2ddbd98e5..001859b0e59 100644 --- a/documentation/meta/ci_cd/flow.md +++ b/documentation/meta/ci_cd/flow.md @@ -1,6 +1,6 @@ # Flow -Since the CI + CD workflow is very complex, to simplify and understand it, we +Since the CI + CD workflow is highly complex, to simplify and understand it, we can assume it to take place across several stages. ```{mermaid} @@ -50,7 +50,7 @@ stage depends. ## Preparation -This is the stage of jobs get executed at the very start of the workflow. Since +This is the stage for jobs that get executed at the start of the workflow. Since they are depended upon by other jobs, once this stage is complete, we can use their resulting state and outputs wherever needed. @@ -107,9 +107,9 @@ flowchart TD The frontend tests run outside the Docker containers, so they don't need to wait for Docker containers to be built. This stage happens in parallel with the -[Docker preparation](#docker-preparation) stage. The Playwright tests are very -time-consuming so this stage extends long enough to be contemporary with the -[Dockerised tests](#dockerised-tests) stage. +[Docker preparation](#docker-preparation) stage. The Playwright tests take much +longer to run than other steps, so it is run simultaneously with the +[Dockerised tests](#dockerised-tests) stage to save time. These tests are only executed if the frontend has changed. Else they will be skipped and bypass jobs for `nuxt-build` and `playwright` will run instead. diff --git a/documentation/meta/ci_cd/proof_of_functionality.md b/documentation/meta/ci_cd/proof_of_functionality.md index f37d30f6f8b..5461157ef8e 100644 --- a/documentation/meta/ci_cd/proof_of_functionality.md +++ b/documentation/meta/ci_cd/proof_of_functionality.md @@ -23,7 +23,7 @@ proof-of-functionality for different use-cases. ### Catalog -The tests for the catalog in `test-cat` are very comprehensive and rigorous, +The tests for the catalog in `test-cat` are comprehensive and rigorous, including extended test suites using the `--extended` flag. Passing these tests conclusively proves that the catalog works as intended. diff --git a/documentation/meta/contribution/codespell.md b/documentation/meta/contribution/codespell.md index 0c8edb6fafc..8ec4aec17a7 100644 --- a/documentation/meta/contribution/codespell.md +++ b/documentation/meta/contribution/codespell.md @@ -44,7 +44,7 @@ jobs: steps: - name: amazing step with: - misspelled-action-argment: hello world!!!!! + misspelled-action-argment: hello world! ``` The line in `ignore_lines.txt` must include all eight of the leading whitespace @@ -52,13 +52,13 @@ characters to ignore the misspelled action argument. The following will not work: ``` -misspelled-action-argment: hello world!!!!! +misspelled-action-argment: hello world! ``` It must be: ``` - misspelled-action-argment: hello world!!!!! + misspelled-action-argment: hello world! ``` When documenting ignored lines, please include an explanation and the targeted diff --git a/documentation/meta/maintenance/elasticsearch_cluster.md b/documentation/meta/maintenance/elasticsearch_cluster.md index d1b3bff79b7..ae75f422d90 100644 --- a/documentation/meta/maintenance/elasticsearch_cluster.md +++ b/documentation/meta/maintenance/elasticsearch_cluster.md @@ -82,9 +82,10 @@ Read the release notes for all ES client versions after the current client version. **Check `Pipfile.lock` to confirm the current version as it may not necessarily be the version in the `Pipfile`, particularly if it is only minor version constrained (uses `~=X.X`)**. Patch versions shouldn't include breaking -changes, and neither should minor versions, but we're better safe than sorry. If -this is a major version upgrade, you should spend extra time reading through the -first major version release notes. +changes, and neither should minor versions, but it's better to read the release +notes anyway than to find out later that there was an issue we needed to be +aware of ahead of time. If this is a major version upgrade, you should spend +extra time reading through the first major version release notes. [Elastic publishes comprehensive and straightforward client migration documentation for major version releases](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/migration.html): these notes are required reading for upgrading between major versions. @@ -239,7 +240,7 @@ If the client versions were not upgraded in deploy all updated services through to production. Confirm the data refresh works before proceeding to the new step. -#### 10. Celebrate, breathe a sigh of relief, and deprovision the old clusters +#### 10. Celebrate and deprovision the old clusters Congrats! We should now be on the new Elasticsearch version. We can tear down the old clusters and say goodbye to that Elasticsearch version. Be sure to diff --git a/documentation/meta/monitoring/cloudwatch_logs/index.md b/documentation/meta/monitoring/cloudwatch_logs/index.md index 5d143e8becc..70af6f9d130 100644 --- a/documentation/meta/monitoring/cloudwatch_logs/index.md +++ b/documentation/meta/monitoring/cloudwatch_logs/index.md @@ -117,10 +117,10 @@ only match log events from the Django application (or vice-versa). ### Logs Insights -Logs Insights is very powerful. It is particular useful for deriving numerical -and statistical data from both structured and unstructured logs, though the -latter require additional parsing. Logs Insights has its own SQL-like query -syntax, +Logs Insights is a powerful tool for querying our logs. It is particular useful +for deriving numerical and statistical data from both structured and +unstructured logs, though the latter require additional parsing. Logs Insights +has its own SQL-like query syntax, [the documentation for which](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CWL_QuerySyntax.html) should be the read before attempting to use Logs Insights for anything serious. @@ -239,10 +239,10 @@ time they are executed. You can minimise costs in the following ways: to reduce the number of log lines processed during each iteration. Avoid searching the full relevant data set until you're confident that the query works and extracts the data you expect. -- When trying to find examples of specific logs, use `limit` with a very low - number. If you just need one example, use `limit 1`. If you need multiple, try - to keep the number low. This is especially useful when developing `parse` - statements. +- When trying to find examples of specific logs, use `limit` with a low number. + If you just need one example, use `limit 1`. If you need multiple, try to keep + the number low, for example, less than 25. This is especially useful when + developing `parse` statements. ### Logs are not retained forever diff --git a/documentation/meta/monitoring/runbooks/unhealthy_ecs_hosts.md b/documentation/meta/monitoring/runbooks/unhealthy_ecs_hosts.md index 8dcc939b283..8fd5baa4b24 100644 --- a/documentation/meta/monitoring/runbooks/unhealthy_ecs_hosts.md +++ b/documentation/meta/monitoring/runbooks/unhealthy_ecs_hosts.md @@ -7,8 +7,6 @@ Status: **Stable** Alarm links: - [production-api](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/production-api+has+unhealthy+hosts) - [staging-api](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/staging-api+has+unhealthy+hosts) -- [productoin-api-thumbnails](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/production-api-thumbnails+has+unhealthy+hosts) -- [staging-api-thumbnails](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/staging-api-thumbnails+has+unhealthy+hosts) - [production-nuxt](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/production-nuxt+has+unhealthy+hosts) - [staging-nuxt](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/staging-nuxt+has+unhealthy+hosts) ``` diff --git a/documentation/meta/monitoring/traffic/runbooks/identifying-and-blocking-traffic-anomalies.md b/documentation/meta/monitoring/traffic/runbooks/identifying-and-blocking-traffic-anomalies.md index 810e976be0f..55a3e156132 100644 --- a/documentation/meta/monitoring/traffic/runbooks/identifying-and-blocking-traffic-anomalies.md +++ b/documentation/meta/monitoring/traffic/runbooks/identifying-and-blocking-traffic-anomalies.md @@ -118,8 +118,8 @@ something. 2. Take the list of suspicious traffic source IDs and go to the `Security => Bots` section of Cloudflare. Here, you'll see more lists of IP - Addresses, ASNs, and User Agents below a graph of known bot traffic. It's - very likely you'll see items from your lists in these graphs. + Addresses, ASNs, and User Agents below a graph of known bot traffic. It is + likely you'll see items from your lists in these graphs. 3. Use the "add filter" button and filter by your listed resources individually. Write down which of the identified resources are comprised of diff --git a/documentation/projects/planning.md b/documentation/projects/planning.md index cc3b2ed7b97..d8a4038ee0f 100644 --- a/documentation/projects/planning.md +++ b/documentation/projects/planning.md @@ -499,7 +499,7 @@ in the earlier project stages but is likely to be implemented here. ## Managing and working with projects We use a new -[Github Project Board](https://github.com/orgs/WordPress/projects/70/views/1) to +[GitHub Project Board](https://github.com/orgs/WordPress/projects/70/views/1) to track all of the projects in Openverse. This board gives us a single view to see all projects and some additional metadata: diff --git a/documentation/projects/yearly_planning/process_outline.md b/documentation/projects/yearly_planning/process_outline.md index fe3d86d231c..edbca61266f 100644 --- a/documentation/projects/yearly_planning/process_outline.md +++ b/documentation/projects/yearly_planning/process_outline.md @@ -80,9 +80,8 @@ See: [Project Themes](/projects/yearly_planning/project_themes.md) descriptions. Descriptions should define the scope of the project and the benefit. Describing projects on our own, without collaborating right away, helps to identify shared project ideas as well as potentially different - ideas of very similar projects. These "duplicate" descriptions will - uncover assumptions and nuances. Each project idea should have the - following: + ideas of similar projects. These "duplicate" descriptions will uncover + assumptions and nuances. Each project idea should have the following: - Title - Summary - Description @@ -108,9 +107,12 @@ See: [Project Themes](/projects/yearly_planning/project_themes.md) - **Example spreadsheet**: [Project Ideas Effort & Impact Voting](https://docs.google.com/spreadsheets/d/1a_xBQvgirYF7Tzmbr5bbfvxaV0RKyBmoJ3FvCVs5zG8/edit?usp=sharing) - **Notes**: + + - Fibonacci's numbers are used here instead of linear sequence: 2, 3, 5, 8, 13 - instead of 1, 2, 3, 4, 5. This helps communicate the big difference between - a "huge project" and a "very small" project. + instead of 1, 2, 3, 4, 5. This helps communicate the big difference between a + "huge project" and a "very small" project. + - Maintainer's _confidence_ when voting is also recorded, on a scale of 1-3. This is useful in the analysis after-the-fact for showing which projects maintainers felt less confident about assessing. diff --git a/frontend/nuxt-template-overrides/README.md b/frontend/nuxt-template-overrides/README.md index 99ed3da0c21..d3a0ad0ad4d 100644 --- a/frontend/nuxt-template-overrides/README.md +++ b/frontend/nuxt-template-overrides/README.md @@ -4,7 +4,7 @@ Due to a CSS ordering bug that we haven't been able to find any other solutions ## Described changes -There are two basic changes we're making all with a single goal: stop the importing of our custom `error.vue` component coming before our static CSS assets. The generated page apparently depends on this specific ordering of file imports, which is far far from ideal. Alas, there doesn't appear to be another clear solution to this problem at the moment, so we're stuck with this hack. +There are two basic changes we're making all with a single goal: stop the importing of our custom `error.vue` component coming before our static CSS assets. The generated page apparently depends on this specific ordering of file imports, which is far from ideal. Alas, there doesn't appear to be another clear solution to this problem at the moment, so we're stuck with this hack. To update these files, it's probably easiest to just copy `App.js` and `index.js` from `node_modules/@nuxt/vue-app/template` into this directory and then apply the following transformations. diff --git a/frontend/src/components/VModal/meta/VModal.stories.mdx b/frontend/src/components/VModal/meta/VModal.stories.mdx index d2d22c60dd5..b3e725fb278 100644 --- a/frontend/src/components/VModal/meta/VModal.stories.mdx +++ b/frontend/src/components/VModal/meta/VModal.stories.mdx @@ -34,13 +34,13 @@ export const Template = (args) => ({ {{ visible ? 'Modal open' : 'Modal closed' }}
- This is some modal 1 content, blah blah blah. + This is some modal 1 content, blah etc.
- This is popover content! Woohoo! I'm inside a modal!!! Wow!!! + This is popover content! Woohoo! I'm inside a modal! Wow!
diff --git a/justfile b/justfile index 9f9cfe63f22..b904f90e7f5 100644 --- a/justfile +++ b/justfile @@ -23,6 +23,7 @@ DC_USER := env_var_or_default("DC_USER", "opener") cd automations/python && just cd automations/js && just cd documentation && just + cd .vale && just printf "\nTo run a nested recipe, add the folder path before it, like \`just frontend/install\`.\n" ###########