Skip to content

Commit

Permalink
Fetch entity imagery from Wikipedia (#1139)
Browse files Browse the repository at this point in the history
  • Loading branch information
bartfeenstra committed Jan 8, 2024
1 parent a8f2efe commit 7be2b0d
Show file tree
Hide file tree
Showing 16 changed files with 392 additions and 283 deletions.
9 changes: 8 additions & 1 deletion betty/app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import TYPE_CHECKING, Mapping, Self, final

import aiohttp
from aiohttp import ClientTimeout
from reactives.instance import ReactiveInstance
from reactives.instance.property import reactive_property

Expand Down Expand Up @@ -332,7 +333,13 @@ def json_encoder(self) -> type[JSONEncoder]:
@reactive_property(on_trigger_delete=True)
def http_client(self) -> aiohttp.ClientSession:
if not self._http_client:
self._http_client = aiohttp.ClientSession(connector=aiohttp.TCPConnector(limit_per_host=5))
self._http_client = aiohttp.ClientSession(
timeout=ClientTimeout(9),
connector=aiohttp.TCPConnector(limit_per_host=5),
headers={
'User-Agent': f'Betty (https://github.com/bartfeenstra/betty) on behalf of {self._project.configuration.base_url}{self._project.configuration.root_path}',
},
)
weakref.finalize(self, sync(self._http_client.close))
return self._http_client

Expand Down
8 changes: 7 additions & 1 deletion betty/assets/betty.pot
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: Betty VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-01-03 16:22+0000\n"
"POT-Creation-Date: 2024-01-08 19:21+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
Expand Down Expand Up @@ -316,6 +316,9 @@ msgstr ""
msgid "Descendant names include"
msgstr ""

msgid "Description, licensing, and image history"
msgstr ""

msgid "Determine if people can be proven to have died. If not, mark them and their associated entities private."
msgstr ""

Expand Down Expand Up @@ -389,6 +392,9 @@ msgstr ""
msgid "Files"
msgstr ""

msgid "Find out more about this image on Wikimedia Commons."
msgstr ""

msgid "Follow Betty on <a href=\"https://twitter.com/Betty_Project\">Twitter</a> and <a href=\"https://github.com/bartfeenstra/betty\">Github</a>."
msgstr ""

Expand Down
12 changes: 9 additions & 3 deletions betty/assets/locale/fr-FR/betty.po
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-01-03 16:22+0000\n"
"POT-Creation-Date: 2024-01-08 19:21+0000\n"
"PO-Revision-Date: 2020-11-27 19:49+0100\n"
"Last-Translator: \n"
"Language: fr\n"
Expand Down Expand Up @@ -381,6 +381,9 @@ msgstr ""
msgid "Descendant names include"
msgstr "Les noms de descendants incluent"

msgid "Description, licensing, and image history"
msgstr ""

msgid ""
"Determine if people can be proven to have died. If not, mark them and "
"their associated entities private."
Expand Down Expand Up @@ -462,6 +465,9 @@ msgstr ""
msgid "Files"
msgstr ""

msgid "Find out more about this image on Wikimedia Commons."
msgstr ""

msgid ""
"Follow Betty on <a href=\"https://twitter.com/Betty_Project\">Twitter</a>"
" and <a href=\"https://github.com/bartfeenstra/betty\">Github</a>."
Expand Down Expand Up @@ -882,8 +888,8 @@ msgstr ""

msgid "This person's name is unavailable to protect their privacy."
msgstr ""
"Le nom de cette personne n'est pas disponibles afin de "
"protéger sa vie privée."
"Le nom de cette personne n'est pas disponibles afin de protéger sa vie "
"privée."

msgid "This person's name is unknown."
msgstr "Le nom de la personne est inconnu."
Expand Down
8 changes: 7 additions & 1 deletion betty/assets/locale/nl-NL/betty.po
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-01-03 16:22+0000\n"
"POT-Creation-Date: 2024-01-08 19:21+0000\n"
"PO-Revision-Date: 2022-04-08 01:58+0100\n"
"Last-Translator: \n"
"Language: nl\n"
Expand Down Expand Up @@ -410,6 +410,9 @@ msgstr "Afleiden..."
msgid "Descendant names include"
msgstr "De nakomelingen heten"

msgid "Description, licensing, and image history"
msgstr "Beschrijving, licentie, en afbeeldingsgeschiedenis"

msgid ""
"Determine if people can be proven to have died. If not, mark them and "
"their associated entities private."
Expand Down Expand Up @@ -491,6 +494,9 @@ msgstr "Bestandspad"
msgid "Files"
msgstr "Bestanden"

msgid "Find out more about this image on Wikimedia Commons."
msgstr "Vind meer informatie over deze afbeelding op Wikimedia Commons."

msgid ""
"Follow Betty on <a href=\"https://twitter.com/Betty_Project\">Twitter</a>"
" and <a href=\"https://github.com/bartfeenstra/betty\">Github</a>."
Expand Down
8 changes: 7 additions & 1 deletion betty/assets/locale/uk/betty.po
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: Betty VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-01-03 16:22+0000\n"
"POT-Creation-Date: 2024-01-08 19:21+0000\n"
"PO-Revision-Date: 2020-05-02 22:29+0100\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: uk\n"
Expand Down Expand Up @@ -382,6 +382,9 @@ msgstr ""
msgid "Descendant names include"
msgstr "Імена нащадків є"

msgid "Description, licensing, and image history"
msgstr ""

msgid ""
"Determine if people can be proven to have died. If not, mark them and "
"their associated entities private."
Expand Down Expand Up @@ -463,6 +466,9 @@ msgstr ""
msgid "Files"
msgstr ""

msgid "Find out more about this image on Wikimedia Commons."
msgstr ""

msgid ""
"Follow Betty on <a href=\"https://twitter.com/Betty_Project\">Twitter</a>"
" and <a href=\"https://github.com/bartfeenstra/betty\">Github</a>."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@

.search-result-preview .image {
border: 1px #eee solid;
height: 45px;
width: 45px;
}

.search-result-file-type {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@

{% set events = (places + [place]) | unique | map(attribute='events') | flatten | select('public') | rejectattr('date', 'none') | selectattr('date.comparable') | list %}
{% include 'timeline.html.j2' %}

{% with files = place.associated_files %}
{% include 'media.html.j2' %}
{% endwith %}
{% endblock %}
Original file line number Diff line number Diff line change
@@ -1,16 +1 @@
{% set embedded=True %}
<a href="{{ entity | url }}" class="nav-secondary-action search-result-target">
<div class="search-result-description">
{% include 'entity/label--person.html.j2' %}
{% include 'entity/meta--person.html.j2' %}
</div>
{% set files = entity.files | select('public') | list %}
{% if files | length > 0 %}
{% set file = files | first %}
{% if file.media_type and file.media_type.type == 'image' %}
<div class="search-result-preview">
<img src="{{ file | image(45, 45) | static_url }}" class="image"{% if file.description %} alt="{{ file.description }}"{% endif %}/>
</div>
{% endif %}
{% endif %}
</a>
{% include 'search/result-with-image.html.j2' %}
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@
{% set embedded=True %}
<a href="{{ entity | url }}" class="nav-secondary-action search-result-target">
<div class="search-result-description">
{% include 'entity/label--place.html.j2' %}
{% include 'entity/meta--place.html.j2' %}
</div>
</a>
{% include 'search/result-with-image.html.j2' %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{% set embedded=True %}
<a href="{{ entity | url }}" class="nav-secondary-action search-result-target">
<div class="search-result-description">
{% include ['entity/label--' + (entity | entity_type_name | camel_case_to_kebab_case) + '.html.j2', 'entity/label.html.j2'] %}
{% include ['entity/meta--' + (entity | entity_type_name | camel_case_to_kebab_case) + '.html.j2', 'entity/meta.html.j2'] ignore missing %}
</div>
{% set files = entity.files | select('public') | list %}
{% if files | length > 0 %}
{% set file = files | first %}
{% if file.media_type and file.media_type.type == 'image' %}
<div class="search-result-preview">
<img src="{{ file | image(45, 45) | static_url }}" class="image"{% if file.description %} alt="{{ file.description }}"{% endif %}/>
</div>
{% endif %}
{% endif %}
</a>
33 changes: 12 additions & 21 deletions betty/extension/wikipedia/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,54 +8,45 @@
from reactives.instance import ReactiveInstance
from reactives.instance.property import reactive_property

from betty import wikipedia
from betty.app.extension import UserFacingExtension
from betty.asyncio import gather
from betty.jinja2 import Jinja2Provider, context_localizer
from betty.load import PostLoader
from betty.locale import negotiate_locale, Str
from betty.model.ancestry import Link
from betty.wikipedia import _Retriever, _Populator, Entry, _parse_url, NotAnEntryError, RetrievalError
from betty.wikipedia import Summary, _parse_url, NotAPageError, RetrievalError


class _Wikipedia(UserFacingExtension, Jinja2Provider, PostLoader, ReactiveInstance):
def __init__(self, *args: Any, **kwargs: Any):
super().__init__(*args, **kwargs)
self.__retriever: _Retriever | None = None
self.__populator: _Populator | None = None
self.__retriever: wikipedia._Retriever | None = None
self.__populator: wikipedia._Populator | None = None

async def post_load(self) -> None:
await self._populator.populate()
populator = wikipedia._Populator(self.app, self._retriever)
await populator.populate()

@property
@reactive_property(on_trigger_delete=True)
def _retriever(self) -> _Retriever:
def _retriever(self) -> wikipedia._Retriever:
if self.__retriever is None:
self.__retriever = _Retriever(self.app.http_client, self.cache_directory_path)
self.__retriever = wikipedia._Retriever(self.app.http_client, self.cache_directory_path)
return self.__retriever

@_retriever.deleter
def _retriever(self) -> None:
self.__retriever = None

@property
@reactive_property(on_trigger_delete=True)
def _populator(self) -> _Populator:
if self.__populator is None:
self.__populator = _Populator(self.app, self._retriever)
return self.__populator

@_populator.deleter
def _populator(self) -> None:
self.__populator = None

@property
def filters(self) -> dict[str, Callable[..., Any]]:
return {
'wikipedia': self._filter_wikipedia_links,
}

@pass_context
async def _filter_wikipedia_links(self, context: Context, links: Iterable[Link]) -> Iterable[Entry]:
async def _filter_wikipedia_links(self, context: Context, links: Iterable[Link]) -> Iterable[Summary]:
return filter(
None,
await gather(*(
Expand All @@ -68,15 +59,15 @@ async def _filter_wikipedia_links(self, context: Context, links: Iterable[Link])
)),
)

async def _filter_wikipedia_link(self, locale: str, link: Link) -> Entry | None:
async def _filter_wikipedia_link(self, locale: str, link: Link) -> Summary | None:
try:
entry_language, entry_name = _parse_url(link.url)
except NotAnEntryError:
except NotAPageError:
return None
if negotiate_locale(locale, {entry_language}) is None:
return None
try:
return await self._retriever.get_entry(entry_language, entry_name)
return await self._retriever.get_summary(entry_language, entry_name)
except RetrievalError:
return None

Expand Down
24 changes: 14 additions & 10 deletions betty/jinja2.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,8 @@ async def _filter_image(

if file.media_type:
if file.media_type.type == 'image':
if 'svg+xml' == file.media_type.subtype:
return await _filter_file(context, file)
task_callable = _execute_filter_image_image
destination_name += file.path.suffix
elif file.media_type.type == 'application' and file.media_type.subtype == 'pdf':
Expand All @@ -558,33 +560,34 @@ async def _filter_image(
task_id = f'filter_image:{file.id}:{width or ""}:{height or ""}'
if task_context is None or task_context.claim(task_id):
cache_directory_path = CACHE_DIRECTORY_PATH / 'image'
await task_callable(file.path, cache_directory_path, file_directory_path, destination_name, width, height)
await task_callable(file, cache_directory_path, file_directory_path, destination_name, width, height)

destination_public_path = '/file/%s' % destination_name

return destination_public_path


async def _execute_filter_image_image(
file_path: Path,
file: File,
cache_directory_path: Path,
destination_directory_path: Path,
destination_name: str,
width: int | None,
height: int | None,
) -> None:
assert file.media_type
with warnings.catch_warnings():
# Ignore warnings about decompression bombs, because we know where the files come from.
warnings.simplefilter('ignore', category=DecompressionBombWarning)
image = Image.open(file_path)
image = Image.open(file.path, formats=[file.media_type.subtype])
try:
await _execute_filter_image(image, file_path, cache_directory_path, destination_directory_path, destination_name, width, height)
await _execute_filter_image(image, file, cache_directory_path, destination_directory_path, destination_name, width, height)
finally:
image.close()


async def _execute_filter_image_application_pdf(
file_path: Path,
file: File,
cache_directory_path: Path,
destination_directory_path: Path,
destination_name: str,
Expand All @@ -594,24 +597,25 @@ async def _execute_filter_image_application_pdf(
with warnings.catch_warnings():
# Ignore warnings about decompression bombs, because we know where the files come from.
warnings.simplefilter('ignore', category=DecompressionBombWarning)
image = convert_from_path(file_path, fmt='jpeg')[0]
image = convert_from_path(file.path, fmt='jpeg')[0]
try:
await _execute_filter_image(image, file_path, cache_directory_path, destination_directory_path, destination_name, width, height)
await _execute_filter_image(image, file, cache_directory_path, destination_directory_path, destination_name, width, height)
finally:
image.close()


async def _execute_filter_image(
image: Image,
file_path: Path,
file: File,
cache_directory_path: Path,
destination_directory_path: Path,
destination_name: str,
width: int | None,
height: int | None,
) -> None:
assert file.media_type
await makedirs(destination_directory_path, exist_ok=True)
cache_file_path = cache_directory_path / ('%s-%s' % (hashfile(file_path), destination_name))
cache_file_path = cache_directory_path / ('%s-%s' % (hashfile(file.path), destination_name))
destination_file_path = destination_directory_path / destination_name

try:
Expand All @@ -632,7 +636,7 @@ async def _execute_filter_image(
converted = _resizeimage.resize_height(image, height)
else:
raise ValueError('Width and height cannot both be None.')
converted.save(cache_file_path)
converted.save(cache_file_path, format=file.media_type.subtype)
await makedirs(destination_directory_path, exist_ok=True)
await link_or_copy(cache_file_path, destination_file_path)

Expand Down
Loading

0 comments on commit 7be2b0d

Please sign in to comment.