diff --git a/docs/changelog.rst b/docs/changelog.rst
index f600df3b12..4ecc5145f8 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -5,6 +5,10 @@ CHANGELOG
2.101.5+dev (XXXX-XX-XX)
------------------------
+**Improvements**
+
+- Allow Apidae Trek parser to handle traces not in utf-8
+
**Documentation**
- Improve performance in spatial intersection (zoning district and zoning city) for sql views (#3600)
@@ -5529,4 +5533,4 @@ Installation script
* Fix regex for RAISE NOTICE (fixes #673)
* Initial public version
-See project history in `docs/history.rst` (French).
\ No newline at end of file
+See project history in `docs/history.rst` (French).
diff --git a/geotrek/common/utils/file_infos.py b/geotrek/common/utils/file_infos.py
new file mode 100644
index 0000000000..5d2ef185ef
--- /dev/null
+++ b/geotrek/common/utils/file_infos.py
@@ -0,0 +1,13 @@
+from chardet.universaldetector import UniversalDetector
+
+
+def get_encoding_file(file_name):
+ # Get encoding mode (utf-8, ascii, ISO-8859-1...)
+ detector = UniversalDetector()
+ detector.reset()
+ for line in open(file_name, 'rb'):
+ detector.feed(line)
+ if detector.done:
+ break
+ detector.close()
+ return detector.result["encoding"]
diff --git a/geotrek/trekking/parsers.py b/geotrek/trekking/parsers.py
index 5ce1b8a46f..8e80294af4 100644
--- a/geotrek/trekking/parsers.py
+++ b/geotrek/trekking/parsers.py
@@ -3,9 +3,11 @@
import re
import zipfile
from collections import defaultdict
-from datetime import date, timedelta
+from datetime import date, timedelta, datetime
from decimal import Decimal
from tempfile import NamedTemporaryFile
+import codecs
+import os
from django.conf import settings
from django.contrib.gis.gdal import DataSource
@@ -14,6 +16,7 @@
from django.utils.translation import gettext as _
from modeltranslation.utils import build_localized_fieldname
+from geotrek.common.utils.file_infos import get_encoding_file
from geotrek.common.models import Label, Theme
from geotrek.common.parsers import (ApidaeBaseParser, AttachmentParserMixin,
GeotrekParser, GlobalImportError, Parser,
@@ -903,7 +906,9 @@ def _get_geom_from_gpx(data):
with NamedTemporaryFile(mode='w+b', dir=settings.TMP_DIR) as ntf:
ntf.write(data)
ntf.flush()
- ds = DataSource(ntf.name)
+
+ file_path = ApidaeTrekParser._maybe_fix_encoding_to_utf8(ntf.name)
+ ds = DataSource(file_path)
for layer_name in ('tracks', 'routes'):
layer = ApidaeTrekParser._get_layer(ds, layer_name)
geos = ApidaeTrekParser._maybe_get_linestring_from_layer(layer)
@@ -912,6 +917,24 @@ def _get_geom_from_gpx(data):
geos.transform(settings.SRID)
return geos
+ @staticmethod
+ def _maybe_fix_encoding_to_utf8(file_name):
+ encoding = get_encoding_file(file_name)
+
+ # If not utf-8, convert file to utf-8
+ if encoding != "utf-8":
+ tmp_file_path = os.path.join(settings.TMP_DIR, 'fileNameTmp_' + str(datetime.now().timestamp()))
+ BLOCKSIZE = 9_048_576
+ with codecs.open(file_name, "r", encoding) as sourceFile:
+ with codecs.open(tmp_file_path, "w", "utf-8") as targetFile:
+ while True:
+ contents = sourceFile.read(BLOCKSIZE)
+ if not contents:
+ break
+ targetFile.write(contents)
+ os.replace(tmp_file_path, file_name)
+ return file_name
+
@staticmethod
def _get_geom_from_kml(data):
"""Given KML data as bytes it returns a geom."""
@@ -935,7 +958,9 @@ def get_first_geom_with_type_in(types, geoms):
with NamedTemporaryFile(mode='w+b', dir=settings.TMP_DIR) as ntf:
ntf.write(data)
ntf.flush()
- ds = DataSource(ntf.name)
+
+ file_path = ApidaeTrekParser._maybe_fix_encoding_to_utf8(ntf.name)
+ ds = DataSource(file_path)
geos = get_geos_linestring(ds)
geos.transform(settings.SRID)
return geos
diff --git a/geotrek/trekking/tests/data/file_bad_encoding.kml b/geotrek/trekking/tests/data/file_bad_encoding.kml
new file mode 100644
index 0000000000..d9c0a67b4b
--- /dev/null
+++ b/geotrek/trekking/tests/data/file_bad_encoding.kml
@@ -0,0 +1,8 @@
+
+
+
+
+ Testé dans le cadre des parseurs donc les caractères: & à @ é ê è doivent être bien affichés
+
+
+
\ No newline at end of file
diff --git a/geotrek/trekking/tests/data/file_good_encoding.kml b/geotrek/trekking/tests/data/file_good_encoding.kml
new file mode 100644
index 0000000000..cde1d55c24
--- /dev/null
+++ b/geotrek/trekking/tests/data/file_good_encoding.kml
@@ -0,0 +1,8 @@
+
+
+
+
+ Testé dans le cadre des parseurs donc les caractères: & à @ é ê è doivent être bien affichés
+
+
+
\ No newline at end of file
diff --git a/geotrek/trekking/tests/test_parsers.py b/geotrek/trekking/tests/test_parsers.py
index efe2ef7aff..0e02a38227 100644
--- a/geotrek/trekking/tests/test_parsers.py
+++ b/geotrek/trekking/tests/test_parsers.py
@@ -7,6 +7,7 @@
from unittest import skipIf
from unittest.mock import Mock
from urllib.parse import urlparse
+from shutil import copy as copyfile
from django.conf import settings
from django.contrib.gis.geos import Point, LineString, MultiLineString, WKTWriter
@@ -16,6 +17,7 @@
from django.test.utils import override_settings
from geotrek.common.utils import testdata
+from geotrek.common.utils.file_infos import get_encoding_file
from geotrek.common.models import Theme, FileType, Attachment, Label
from geotrek.common.tests.mixins import GeotrekParserTestMixin
from geotrek.core.tests.factories import PathFactory
@@ -1148,6 +1150,34 @@ def test_trek_illustration_is_not_imported_on_missing_file_metadata(self, mocked
self.assertEqual(Attachment.objects.count(), 0)
+class TestApidaeTrekParserConvertEncodingFiles(TestCase):
+ data_dir = "geotrek/trekking/tests/data"
+
+ def test_fix_encoding_to_utf8(self):
+ file_name = f'{self.data_dir}/file_bad_encoding_tmp.kml'
+ copyfile(f'{self.data_dir}/file_bad_encoding.kml', file_name)
+
+ encoding = get_encoding_file(file_name)
+ self.assertNotEqual(encoding, "utf-8")
+
+ new_file_name = ApidaeTrekParser._maybe_fix_encoding_to_utf8(file_name)
+
+ encoding = get_encoding_file(new_file_name)
+ self.assertEqual(encoding, "utf-8")
+
+ def test_not_fix_encoding_to_utf8(self):
+ file_name = f'{self.data_dir}/file_good_encoding_tmp.kml'
+ copyfile(f'{self.data_dir}/file_good_encoding.kml', file_name)
+
+ encoding = get_encoding_file(file_name)
+ self.assertEqual(encoding, "utf-8")
+
+ new_file_name = ApidaeTrekParser._maybe_fix_encoding_to_utf8(file_name)
+
+ encoding = get_encoding_file(new_file_name)
+ self.assertEqual(encoding, "utf-8")
+
+
class TestApidaeTrekThemeParser(ApidaeTrekThemeParser):
url = 'https://example.net/fake/api/'
diff --git a/requirements.txt b/requirements.txt
index a4ed76c50e..1857c6be34 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -51,6 +51,8 @@ cffi==1.16.0
# persistent
# pyvips
# weasyprint
+chardet==5.2.0
+ # via geotrek (setup.py)
charset-normalizer==3.2.0
# via requests
click==8.1.3
diff --git a/setup.py b/setup.py
index 293661e53e..fe7b72f48b 100644
--- a/setup.py
+++ b/setup.py
@@ -34,6 +34,7 @@ def run(self):
install_requires=[
'Django==3.2.*',
'mapentity',
+ 'chardet',
'cairosvg',
'cairocffi',
'env_file',