Skip to content

Commit

Permalink
Merge pull request #110 from zamazaljiri/codecs_remove_accent_errors_…
Browse files Browse the repository at this point in the history
…handler

Added unicode error handler for remove accent
  • Loading branch information
matllubos authored Feb 1, 2021
2 parents 1f8375e + 58c29a1 commit fbce89f
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
15 changes: 15 additions & 0 deletions chamber/patch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import codecs

from django.db.models import Model
from django.db.models.fields import Field

from chamber.utils import remove_accent


class OptionsLazy:

Expand Down Expand Up @@ -62,6 +66,17 @@ def humanize(val, inst, *args, **kwargs):
getattr(self, '_init_chamber_patch_')(*args, **kwargs)


def remove_accent_errors(exception):
"""
Implements the 'remove_accent' error handling (for encoding with text encodings only): the unencodable character
is replaced by an character without accent (characters are converted to ASCII).
"""
chunk = exception.object[exception.start:exception.end]
return remove_accent(chunk), exception.end


Field.default_humanized = None
Field._init_chamber_patch_ = Field.__init__ # pylint: disable=W0212
Field.__init__ = field_init

codecs.register_error('remove_accent', remove_accent_errors)
14 changes: 13 additions & 1 deletion example/dj/apps/test_chamber/tests/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import codecs

from django.test import TestCase
from django.utils.functional import cached_property
from django.utils.safestring import SafeData, mark_safe
Expand Down Expand Up @@ -28,7 +30,17 @@ def cached_property_method(self):
class UtilsTestCase(TestCase):

def test_should_remove_accent_from_string(self):
assert_equal('escrzyaie', remove_accent('ěščřžýáíé'))
assert_equal(remove_accent('ěščřžýáíé'), 'escrzyaie')

def test_should_remove_accent_from_string_when_unicode_error(self):
assert_equal(
codecs.encode('àaáÀAÁ', 'windows-1250', 'remove_accent'),
'aaáAAÁ'.encode('windows-1250') # characters "à" "À" are not in charset windows-1250
)
assert_equal(
codecs.encode('ⓓⓙⓐⓝⓖⓞ-ⓒⓗⓐⓜⓑⓔⓡ', 'windows-1250', 'remove_accent'),
'django-chamber'.encode('windows-1250')
)

classes_and_method_names = [
[TestClass.method, TestClass, 'method'],
Expand Down

0 comments on commit fbce89f

Please sign in to comment.