Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-40847: Enable ingest technotes (technote.lsst.io - type documents) #150

Merged
merged 11 commits into from
Sep 26, 2023
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
args: [--autofix, --indent=2, '--top-keys=name,doc,type']

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.287
rev: v0.0.290
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

<!-- scriv-insert-here -->

<a id='changelog-0.9.0'></a>
## 0.9.0 (2023-09-26)

### New features

- Added support for ingesting Technotes (as generated with the technote.lsst.io framework). These technotes are generated with Sphinx, but embed metadata in common formats like Highwire Press and OpenGraph. This new technote format replaces the original technote format, although the original technotes are still supported by Ook.

<a id='changelog-0.8.0'></a>
## 0.8.0 (2023-09-06)

Expand Down
2 changes: 2 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def _make_env_vars() -> dict[str, str]:
"OOK_ENABLE_CONSUMER": "false",
"ALGOLIA_APP_ID": "test",
"ALGOLIA_API_KEY": "test",
"OOK_GITHUB_APP_ID": "1234",
"OOK_GITHUB_APP_PRIVATE_KEY": "test",
}


Expand Down
3 changes: 2 additions & 1 deletion requirements/dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ pydantic
pytest
pytest-asyncio
pytest-cov
respx
types-dateparser
types-PyYAML
documenteer[guide] >= 1.0.0a5
documenteer[guide] == 1.0.0a6 # before pydantic 2 migration
autodoc_pydantic
46 changes: 25 additions & 21 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ click==8.1.3
# via
# -c requirements/main.txt
# documenteer
contourpy==1.1.0
contourpy==1.1.1
# via matplotlib
coverage[toml]==7.3.0
coverage[toml]==7.3.1
# via
# -r requirements/dev.in
# pytest-cov
Expand All @@ -57,28 +57,30 @@ docutils==0.20.1
# pybtex-docutils
# pydata-sphinx-theme
# sphinx
# sphinx-prompt
# sphinxcontrib-bibtex
filelock==3.12.3
filelock==3.12.4
# via virtualenv
fonttools==4.42.1
# via matplotlib
gitdb==4.0.10
# via gitpython
gitpython==3.1.34
gitpython==3.1.37
# via documenteer
h11==0.14.0
# via
# -c requirements/main.txt
# httpcore
httpcore==0.17.3
httpcore==0.18.0
# via
# -c requirements/main.txt
# httpx
httpx==0.24.1
httpx==0.25.0
# via
# -c requirements/main.txt
# -r requirements/dev.in
identify==2.5.27
# respx
identify==2.5.29
# via pre-commit
idna==3.4
# via
Expand All @@ -95,7 +97,7 @@ jinja2==3.1.2
# myst-parser
# sphinx
# sphinxcontrib-redoc
jsonschema==4.19.0
jsonschema==4.19.1
# via sphinxcontrib-redoc
jsonschema-specifications==2023.7.1
# via jsonschema
Expand All @@ -112,7 +114,7 @@ markdown-it-py[linkify]==3.0.0
# myst-parser
markupsafe==2.1.3
# via jinja2
matplotlib==3.7.2
matplotlib==3.8.0
# via sphinxext-opengraph
mdit-py-plugins==0.4.0
# via myst-parser
Expand All @@ -126,7 +128,7 @@ myst-parser==2.0.0
# via documenteer
nodeenv==1.8.0
# via pre-commit
numpy==1.25.2
numpy==1.26.0
# via
# contourpy
# matplotlib
Expand All @@ -137,7 +139,7 @@ packaging==23.1
# pydata-sphinx-theme
# pytest
# sphinx
pillow==10.0.0
pillow==10.0.1
# via matplotlib
platformdirs==3.10.0
# via virtualenv
Expand All @@ -164,9 +166,9 @@ pygments==2.16.1
# pydata-sphinx-theme
# sphinx
# sphinx-prompt
pyparsing==3.0.9
pyparsing==3.1.1
# via matplotlib
pytest==7.4.1
pytest==7.4.2
# via
# -r requirements/dev.in
# pytest-asyncio
Expand Down Expand Up @@ -196,7 +198,9 @@ requests==2.31.0
# -c requirements/main.txt
# documenteer
# sphinx
rpds-py==0.10.2
respx==0.20.2
# via -r requirements/dev.in
rpds-py==0.10.3
# via
# jsonschema
# referencing
Expand All @@ -207,7 +211,7 @@ six==1.16.0
# pybtex
# python-dateutil
# sphinxcontrib-redoc
smmap==5.0.0
smmap==5.0.1
# via gitdb
sniffio==1.3.0
# via
Expand All @@ -220,7 +224,7 @@ snowballstemmer==2.2.0
# via sphinx
soupsieve==2.5
# via beautifulsoup4
sphinx==7.2.5
sphinx==7.2.6
# via
# autodoc-pydantic
# documenteer
Expand Down Expand Up @@ -248,7 +252,7 @@ sphinx-copybutton==0.5.2
# via documenteer
sphinx-design==0.5.0
# via documenteer
sphinx-prompt==1.5.0
sphinx-prompt==1.8.0
# via documenteer
sphinxcontrib-applehelp==1.0.7
# via sphinx
Expand Down Expand Up @@ -276,20 +280,20 @@ types-dateparser==1.1.4.10
# via -r requirements/dev.in
types-pyyaml==6.0.12.11
# via -r requirements/dev.in
typing-extensions==4.7.1
typing-extensions==4.8.0
# via
# -c requirements/main.txt
# mypy
# pydantic
uc-micro-py==1.0.2
# via linkify-it-py
urllib3==2.0.4
urllib3==2.0.5
# via
# -c requirements/main.txt
# requests
virtualenv==20.24.4
virtualenv==20.24.5
# via pre-commit

# The following packages are considered to be unsafe in a requirements file:
setuptools==68.1.2
setuptools==68.2.2
# via nodeenv
3 changes: 2 additions & 1 deletion requirements/main.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ uvicorn[standard]
# Other dependencies.
aiokafka
pydantic < 2.0.0
safir>=4.3.0
safir>=4.3.0,<5.0.0
algoliasearch>=3.0,<4.0
# kafkit[pydantic,httpx,aiokafka] @ git+https://github.com/lsst-sqre/kafkit.git@tickets/DM-39646
kafkit[pydantic,httpx,aiokafka]>=1.0.0a1
dataclasses-avroschema<0.51.0 # before Pydantic 2 migration
click<8.1.4 # see https://github.com/pallets/click/issues/2558
dateparser
lxml
Expand Down
21 changes: 12 additions & 9 deletions requirements/main.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,20 @@ charset-normalizer==3.2.0
click==8.1.3
# via
# -r requirements/main.in
# safir
# uvicorn
cryptography==41.0.3
cryptography==41.0.4
# via
# pyjwt
# safir
cssselect==1.2.0
# via -r requirements/main.in
dacite==1.8.1
# via dataclasses-avroschema
dataclasses-avroschema[pydantic]==0.47.2
# via kafkit
dataclasses-avroschema[pydantic]==0.50.2
# via
# -r requirements/main.in
# kafkit
dateparser==1.1.8
# via -r requirements/main.in
dnspython==2.4.2
Expand All @@ -64,7 +67,7 @@ fastapi==0.103.1
# via
# -r requirements/main.in
# safir
fastavro==1.8.2
fastavro==1.8.3
# via
# dataclasses-avroschema
# kafkit
Expand All @@ -74,11 +77,11 @@ h11==0.14.0
# via
# httpcore
# uvicorn
httpcore==0.17.3
httpcore==0.18.0
# via httpx
httptools==0.6.0
# via uvicorn
httpx==0.24.1
httpx==0.25.0
# via
# kafkit
# safir
Expand Down Expand Up @@ -126,7 +129,7 @@ regex==2023.8.8
# via dateparser
requests==2.31.0
# via algoliasearch
safir==4.3.1
safir==4.5.0
# via -r requirements/main.in
six==1.16.0
# via python-dateutil
Expand All @@ -142,7 +145,7 @@ starlette==0.27.0
# safir
structlog==23.1.0
# via safir
typing-extensions==4.7.1
typing-extensions==4.8.0
# via
# aiohttp
# fastapi
Expand All @@ -153,7 +156,7 @@ uritemplate==4.1.1
# via
# gidgethub
# kafkit
urllib3==2.0.4
urllib3==2.0.5
# via requests
uvicorn[standard]==0.23.2
# via -r requirements/main.in
Expand Down
8 changes: 8 additions & 0 deletions src/ook/domain/algoliarecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
class DocumentSourceType(str, Enum):
"""Types of content that can be classified by Ook."""

LTD_TECHNOTE = "ltd_technote"
"""A technote (technote.lsst.io) that is hosted on LSST the Docs."""

LTD_LANDER_JSONLD = "ltd_lander_jsonld"
"""A lander-based site for PDF-based content that includes a
``metadata.jsonld`` file and is hosted on LSST the Docs.
Expand Down Expand Up @@ -206,6 +209,11 @@ def export_for_algolia(self) -> dict[str, Any]:
"""Export into a dict that can be uploaded to Algolia."""
return self.dict(by_alias=True, exclude_none=True)

@property
def headers(self) -> list[str | None]:
"""The headers of the document."""
return [self.h1, self.h2, self.h3, self.h4, self.h5, self.h6]


def generate_surrogate_key() -> str:
"""Generate a surrogate key that applies to all records for a given
Expand Down
6 changes: 3 additions & 3 deletions src/ook/domain/kafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"UrlIngestKeyV1",
"LtdEditionV1",
"LtdProjectV1",
"LtdUrlIngestV1",
"LtdUrlIngestV2",
]


Expand Down Expand Up @@ -61,7 +61,7 @@ class LtdProjectV1(AvroBaseModel):
slug: str = Field(..., description="The slug of the project.")


class LtdUrlIngestV1(AvroBaseModel):
class LtdUrlIngestV2(AvroBaseModel):
"""Kafka message value model for a request to ingest a URL hosted on
LSST the Docs.

Expand Down Expand Up @@ -91,4 +91,4 @@ class Meta:
"""Metadata for the model."""

namespace = "lsst.square-events.ook"
schema_name = "ltd_url_ingest_v1"
schema_name = "ltd_url_ingest_v2"
Loading