Skip to content

Commit

Permalink
[Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creatin…
Browse files Browse the repository at this point in the history
…g tags (GeoNode#10906)

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags add tests

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags

* [Fixes GeoNode#10859] Deadlock in HierarchicalTagManager when creating tags

---------

Co-authored-by: Giovanni Allegri <[email protected]>
  • Loading branch information
mattiagiupponi and giohappy authored Apr 21, 2023
1 parent 6448ad4 commit 35b1eb1
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 35 deletions.
84 changes: 49 additions & 35 deletions geonode/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from django.utils.html import escape
from django.utils.timezone import now
from django.db.models import Q, signals
from django.db.utils import IntegrityError, OperationalError
from django.contrib.auth.models import Group
from django.core.files.base import ContentFile
from django.contrib.auth import get_user_model
Expand Down Expand Up @@ -397,49 +398,62 @@ def add(self, *tags, through_defaults=None, tag_kwargs=None):
tag_objs = set(tags) - str_tags
# If str_tags has 0 elements Django actually optimizes that to not do a
# query. Malcolm is very smart.
"""
To avoid concurrency with the keyword in case of a massive upload.
With the transaction block and the select_for_update,
we can easily handle the concurrency.
DOC: https://docs.djangoproject.com/en/3.2/ref/models/querysets/#select-for-update
"""
existing = self.through.tag_model().objects.select_for_update().filter(name__in=str_tags, **tag_kwargs)
with transaction.atomic():
try:
existing = self.through.tag_model().objects.filter(name__in=str_tags, **tag_kwargs).all()
tag_objs.update(existing)
new_ids = set()
_new_keyword = str_tags - set(t.name for t in existing)
for new_tag in list(_new_keyword):
_new_keywords = str_tags - set(t.name for t in existing)
for new_tag in list(_new_keywords):
new_tag = escape(new_tag)
try:
new_tag_obj = HierarchicalKeyword.add_root(name=new_tag)
new_tag_obj = None
with transaction.atomic():
try:
new_tag_obj = HierarchicalKeyword.add_root(name=new_tag)
except Exception as e:
logger.exception(e)
# HierarchicalKeyword.add_root didn't return, probably the keyword already exists
if not new_tag_obj:
new_tag_obj = HierarchicalKeyword.objects.filter(name=new_tag)
if new_tag_obj.exists():
new_tag_obj.first()
if new_tag_obj:
tag_objs.add(new_tag_obj)
new_ids.add(new_tag_obj.id)
else:
# Something has gone seriously wrong and we cannot assign the tag to the resource
logger.error(f"Error during the keyword creation for keyword: {new_tag}")

signals.m2m_changed.send(
sender=self.through,
action="pre_add",
instance=self.instance,
reverse=False,
model=self.through.tag_model(),
pk_set=new_ids,
)

for tag in tag_objs:
try:
self.through.objects.get_or_create(tag=tag, **self._lookup_kwargs(), defaults=through_defaults)
except Exception as e:
logger.exception(e)

signals.m2m_changed.send(
sender=self.through,
action="pre_add",
instance=self.instance,
reverse=False,
model=self.through.tag_model(),
pk_set=new_ids,
)

for tag in tag_objs:
try:
self.through.objects.get_or_create(tag=tag, **self._lookup_kwargs(), defaults=through_defaults)
except Exception as e:
logger.exception(e)

signals.m2m_changed.send(
sender=self.through,
action="post_add",
instance=self.instance,
reverse=False,
model=self.through.tag_model(),
pk_set=new_ids,
)
signals.m2m_changed.send(
sender=self.through,
action="post_add",
instance=self.instance,
reverse=False,
model=self.through.tag_model(),
pk_set=new_ids,
)
except IntegrityError as e:
logger.warning("The keyword provided already exists", exc_info=e)
except OperationalError as e:
logger.warning(
"An error has occured with the DB connection. Please try to re-add the keywords again", exc_info=e
)
except Exception as e:
raise e


class Thesaurus(models.Model):
Expand Down
37 changes: 37 additions & 0 deletions geonode/base/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
#
#########################################################################

import logging
import os
from django.db.utils import IntegrityError, OperationalError
import requests

from uuid import uuid4
Expand All @@ -40,6 +42,7 @@
from geonode.base.templatetags.base_tags import display_change_perms_button
from geonode.base.utils import OwnerRightsRequestViewUtils
from geonode.base.models import (
HierarchicalKeyword,
ResourceBase,
MenuPlaceholder,
Menu,
Expand Down Expand Up @@ -1087,3 +1090,37 @@ def test_is_thesaurus_available_should_return_empty_queryset_for_non_existing_th
thesaurus = {"title": "Random Thesaurus Title"}
actual = self.sut.is_thesaurus_available(thesaurus, keyword)
self.assertEqual(0, len(actual))


class Test_HierarchicalTagManager(GeoNodeBaseTestSupport):
def setUp(self) -> None:
self.sut = create_single_dataset(name="dataset_for_keyword")
self.keyword = HierarchicalKeyword.objects.create(name="test_kw", slug="test_kw", depth=1)
self.keyword.save()

def tearDown(self) -> None:
self.sut.keywords.remove(self.keyword)

def test_keyword_are_correctly_saved(self):
self.assertFalse(self.sut.keywords.exists())
self.sut.keywords.add(self.keyword)
self.assertTrue(self.sut.keywords.exists())

@patch("django.db.models.query.QuerySet._fetch_all")
def test_keyword_raise_integrity_error(self, keyword_fetch_method):
keyword_fetch_method.side_effect = IntegrityError()
logger = logging.getLogger("geonode.base.models")
with self.assertLogs(logger, level="WARNING") as _log:
self.sut.keywords.add(self.keyword)
self.assertIn("The keyword provided already exists", [x.message for x in _log.records])

@patch("geonode.base.models.HierarchicalKeyword.add_root")
def test_keyword_raise_db_error(self, add_root_mocked):
add_root_mocked.side_effect = OperationalError()
logger = logging.getLogger("geonode.base.models")
with self.assertLogs(logger) as _log:
self.sut.keywords.add("keyword2")
self.assertIn(
"Error during the keyword creation for keyword: keyword2",
[x.message for x in _log.records],
)

0 comments on commit 35b1eb1

Please sign in to comment.