Skip to content

Commit

Permalink
packages: fix up parse_accept_language()
Browse files Browse the repository at this point in the history
Make several fixes to our handling of the `Accept-Language:` header:

 - we used to crash if `q=` contained invalid values due to an unchecked
   cast with `float()`.  Catch that error and ignore the entry in that
   case.

 - our handling of English was incorrect.  We would look for and fail to
   find po.en.js, and move on to the next item in the list.  That means
   if the user listed English first, followed by another language which
   we did support, they'd see Cockpit in that other language, which is
   unexpected.  This is a regression introduced by f4be906.  Now we
   drop all items that sort after English.

 - our handling of fallbacks (ie: 'de' from 'de-de') was incorrect.
   RFC4647 §3.4 says that a "Lookup" should consider items in the order
   they're found, stripping each item down to its base form, before
   considering the next item.  This passes a gut check, as well: a user
   who lists `de-de, nl` probably expects to see a German translation
   before a Dutch one.

We also now mark the parser code as `@lru_cache`.  This makes sense:
within a given run of cockpit-bridge, we're likely to see very many
copies of the Accept-Language header, and they're practically always
going to be identical.  Make sure we now accept and return immutable
types to prevent weird side-effects.  We also take the time to remove
duplicate items from the list.

While we're at it, up our testing game a bit to make sure we don't
mistreat requests for 'English' in the future.

Closes #19526
  • Loading branch information
allisonkarlitskaya committed Oct 26, 2023
1 parent b74151b commit b6564b2
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 33 deletions.
74 changes: 47 additions & 27 deletions src/cockpit/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,28 +62,55 @@
logger = logging.getLogger(__name__)


def parse_accept_language(headers: JsonObject) -> List[str]:
# In practice, this is going to get called over and over again with exactly the
# same list. Let's try to cache the result.
@functools.lru_cache()
def parse_accept_language(accept_language: str) -> Sequence[str]:
"""Parse the Accept-Language header, if it exists.
Returns an ordered list of languages.
Returns an ordered list of languages, with fallbacks inserted, and
truncated to the position where 'en' would have otherwise appeared, if
applicable.
https://tools.ietf.org/html/rfc7231#section-5.3.5
https://datatracker.ietf.org/doc/html/rfc4647#section-3.4
"""

locales = []
for language in get_str(headers, 'Accept-Language', '').split(','):
language = language.strip()
locale, _, weightstr = language.partition(';q=')
weight = float(weightstr or 1)

# Skip possible empty locales
if not locale:
continue

# Locales are case-insensitive and we store our list in lowercase
locales.append((locale.lower(), weight))

return [locale for locale, _ in sorted(locales, key=lambda k: k[1], reverse=True)]
logger.debug('parse_accept_language(%r)', accept_language)
locales_with_q = []
for entry in accept_language.split(','):
entry = entry.strip().lower()
logger.debug(' entry %r', entry)
locale, _, qstr = entry.partition(';q=')
try:
q = float(qstr or 1.0)
except ValueError:
continue # ignore malformed entry

while locale:
logger.debug(' adding %r q=%r', locale, q)
locales_with_q.append((locale, q))
# strip off '-detail' suffixes until there's nothing left
locale, _, _region = locale.rpartition('-')

# Sort the list by highest q value. Otherwise, this is a stable sort.
locales_with_q.sort(key=lambda pair: pair[1], reverse=True)
logger.debug(' sorted list is %r', locales_with_q)

# If we have 'en' anywhere in our list, ignore it and all items after it.
# This will result in us getting an untranslated (ie: English) version if
# none of the more-preferred languages are found, which is what we want.
# We also take the chance to drop duplicate items. Note: both of these
# things need to happen after sorting.
results = []
for locale, _q in locales_with_q:
if locale == 'en':
break
if locale not in results:
results.append(locale)

logger.debug(' results list is %r', results)
return tuple(results)


def sortify_version(version: str) -> str:
Expand Down Expand Up @@ -280,22 +307,15 @@ def load_file(self, filename: str) -> Document:

return Document(path.open('rb'), content_type, content_encoding, content_security_policy)

def load_translation(self, path: str, locales: List[str]) -> Document:
def load_translation(self, path: str, locales: Sequence[str]) -> Document:
self.ensure_scanned()
assert self.translations is not None

# First check the locales that the user sent
# First match wins
for locale in locales:
with contextlib.suppress(KeyError):
return self.load_file(self.translations[path][locale])

# Next, check the language-only versions of variant-specified locales
for locale in locales:
language, _, region = locale.partition('-')
if region:
with contextlib.suppress(KeyError):
return self.load_file(self.translations[path][language])

# We prefer to return an empty document than 404 in order to avoid
# errors in the console when a translation can't be found
return Document(io.BytesIO(), 'text/javascript')
Expand All @@ -306,7 +326,7 @@ def load_path(self, path: str, headers: JsonObject) -> Document:
assert self.translations is not None

if path in self.translations:
locales = parse_accept_language(headers)
locales = parse_accept_language(get_str(headers, 'Accept-Language', ''))
return self.load_translation(path, locales)
else:
return self.load_file(self.files[path])
Expand Down Expand Up @@ -500,7 +520,7 @@ def load_manifests_js(self, headers: JsonObject) -> Document:
chunks: List[bytes] = []

# Send the translations required for the manifest files, from each package
locales = parse_accept_language(headers)
locales = parse_accept_language(get_str(headers, 'Accept-Language', ''))
for name, package in self.packages.items():
if name in ['static', 'base1']:
continue
Expand Down
63 changes: 57 additions & 6 deletions test/pytest/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,33 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import json
from typing import List

import pytest

from cockpit.packages import Packages, parse_accept_language


@pytest.mark.parametrize(("test_input", "expected"), [
('de-at, zh-CH, en,', ['de-at', 'zh-ch', 'en']),
('es-es, nl;q=0.8, fr;q=0.9', ['es-es', 'fr', 'nl']),
('fr-CH, fr;q=0.9, en;q=0.8, de;q=0.7, *;q=0.5', ['fr-ch', 'fr', 'en', 'de', '*'])
# correct handles empty values
('', ()),
(' ', ()),
(' , ', ()),
(' , ,xx', ('xx',)),
# english → empty list
('en', ()),
(' , en', ()),
# invalid q values get ignored
('aa;q===,bb;q=abc,cc;q=.,zz', ('zz',)),
# variant-peeling works
('aa-bb-cc-dd,ee-ff-gg-hh', ('aa-bb-cc-dd', 'aa-bb-cc', 'aa-bb', 'aa', 'ee-ff-gg-hh', 'ee-ff-gg', 'ee-ff', 'ee')),
# sorting and english-truncation are working
('fr-ch;q=0.8,es-mx;q=1.0,en-ca;q=0.9', ('es-mx', 'es', 'en-ca')),
('de-at, zh-CN, en,', ('de-at', 'de', 'zh-cn', 'zh')),
('es-es, nl;q=0.8, fr;q=0.9', ('es-es', 'es', 'fr', 'nl')),
('fr-CH, fr;q=0.9, en;q=0.8, de;q=0.7, *;q=0.5', ('fr-ch', 'fr'))
])
def test_parse_accept_language(test_input: str, expected: List[str]) -> None:
assert parse_accept_language({'Accept-Language': test_input}) == expected
def test_parse_accept_language(test_input: str, expected: 'tuple[str]') -> None:
assert parse_accept_language(test_input) == expected


@pytest.fixture
Expand Down Expand Up @@ -160,6 +173,44 @@ def test_condition_hides_priority(pkgdir):
assert packages.packages['basic'].priority == 1


def test_english_translation(pkgdir):
make_package(pkgdir, 'one')
(pkgdir / 'one' / 'po.de.js').write_text('eins')

packages = Packages()

# make sure we get German
document = packages.load_path('/one/po.js', {'Accept-Language': 'de'})
assert '/javascript' in document.content_type
assert document.data.read() == b'eins'

# make sure we get German here (higher q-value) even with English first
document = packages.load_path('/one/po.js', {'Accept-Language': 'en;q=0.9, de-ch'})
assert '/javascript' in document.content_type
assert document.data.read() == b'eins'

# make sure we get the empty ("English") translation, and not German
document = packages.load_path('/one/po.js', {'Accept-Language': 'en, de'})
assert '/javascript' in document.content_type
assert document.data.read() == b''

document = packages.load_path('/one/po.js', {'Accept-Language': 'de;q=0.9, fr;q=0.7, en'})
assert '/javascript' in document.content_type
assert document.data.read() == b''

document = packages.load_path('/one/po.js', {'Accept-Language': 'de;q=0.9, fr, en-ca'})
assert '/javascript' in document.content_type
assert document.data.read() == b''

document = packages.load_path('/one/po.js', {'Accept-Language': ''})
assert '/javascript' in document.content_type
assert document.data.read() == b''

document = packages.load_path('/one/po.js', {})
assert '/javascript' in document.content_type
assert document.data.read() == b''


def test_translation(pkgdir):
# old style: make sure po.de.js is served as fallback for manifest translations
make_package(pkgdir, 'one')
Expand Down

0 comments on commit b6564b2

Please sign in to comment.