added ability to set rate limiting for url import

TandoorRecipes · Dec 16, 2023 · dd3e91e · dd3e91e
1 parent 76b8489
commit dd3e91e
Show file tree

Hide file tree

Showing 4 changed files with 114 additions and 104 deletions.
diff --git a/.env.template b/.env.template
@@ -184,4 +184,8 @@ REMOTE_USER_AUTH=0
 # EXPORT_FILE_CACHE_DURATION=600
 
 # if you want to do many requests to the FDC API you need to get a (free) API key. Demo key is limited to 30 requests / hour or 50 requests / day
-#FDC_API_KEY=DEMO_KEY
+#FDC_API_KEY=DEMO_KEY
+
+# API throttle limits
+# you may use X per second, minute, hour or day
+# DRF_THROTTLE_RECIPE_URL_IMPORT=60/hour
diff --git a/cookbook/urls.py b/cookbook/urls.py
@@ -129,7 +129,7 @@ def extend(self, r):
     path('api/sync_all/', api.sync_all, name='api_sync'),
     path('api/log_cooking/<int:recipe_id>/', api.log_cooking, name='api_log_cooking'),
     path('api/plan-ical/<slug:from_date>/<slug:to_date>/', api.get_plan_ical, name='api_get_plan_ical'),
-    path('api/recipe-from-source/', api.recipe_from_source, name='api_recipe_from_source'),
+    path('api/recipe-from-source/', api.RecipeUrlImportView.as_view(), name='api_recipe_from_source'),
     path('api/backup/', api.get_backup, name='api_backup'),
     path('api/ingredient-from-string/', api.ingredient_from_string, name='api_ingredient_from_string'),
     path('api/share-link/<int:pk>', api.share_link, name='api_share_link'),

diff --git a/cookbook/views/api.py b/cookbook/views/api.py
@@ -46,7 +46,7 @@
 from rest_framework.parsers import MultiPartParser
 from rest_framework.renderers import JSONRenderer, TemplateHTMLRenderer
 from rest_framework.response import Response
-from rest_framework.throttling import AnonRateThrottle
+from rest_framework.throttling import AnonRateThrottle, UserRateThrottle
 from rest_framework.views import APIView
 from rest_framework.viewsets import ViewSetMixin
 from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
@@ -104,7 +104,7 @@
                                  UserSerializer, UserSpaceSerializer, ViewLogSerializer)
 from cookbook.views.import_export import get_integration
 from recipes import settings
-from recipes.settings import FDC_API_KEY
+from recipes.settings import FDC_API_KEY, DRF_THROTTLE_RECIPE_URL_IMPORT
 
 
 class StandardFilterMixin(ViewSetMixin):
@@ -1298,6 +1298,10 @@ class AuthTokenThrottle(AnonRateThrottle):
     rate = '10/day'
 
 
+class RecipeImportThrottle(UserRateThrottle):
+    rate = DRF_THROTTLE_RECIPE_URL_IMPORT
+
+
 class CustomAuthToken(ObtainAuthToken):
     throttle_classes = [AuthTokenThrottle]
 
@@ -1323,114 +1327,114 @@ def post(self, request, *args, **kwargs):
         })
 
 
-@api_view(['POST'])
-# @schema(AutoSchema()) #TODO add proper schema
-@permission_classes([CustomIsUser & CustomTokenHasReadWriteScope])
-# TODO add rate limiting
-def recipe_from_source(request):
-    """
-    function to retrieve a recipe from a given url or source string
-    :param request: standard request with additional post parameters
-            - url: url to use for importing recipe
-            - data: if no url is given recipe is imported from provided source data
-            - (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
-    :return: JsonResponse containing the parsed json and images
-    """
-    scrape = None
-    serializer = RecipeFromSourceSerializer(data=request.data)
-    if serializer.is_valid():
-
-        if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (
-                bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
-            serializer.validated_data['url'] = bookmarklet.url
-            serializer.validated_data['data'] = bookmarklet.html
-            bookmarklet.delete()
-
-        url = serializer.validated_data.get('url', None)
-        data = unquote(serializer.validated_data.get('data', None))
-        if not url and not data:
-            return Response({
-                'error': True,
-                'msg': _('Nothing to do.')
-            }, status=status.HTTP_400_BAD_REQUEST)
-
-        elif url and not data:
-            if re.match('^(https?://)?(www\\.youtube\\.com|youtu\\.be)/.+$', url):
-                if validators.url(url, public=True):
-                    return Response({
-                        'recipe_json': get_from_youtube_scraper(url, request),
-                        'recipe_images': [],
-                    }, status=status.HTTP_200_OK)
-            if re.match(
-                    '^(.)*/view/recipe/[0-9]+/[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
-                    url):
-                recipe_json = requests.get(
-                    url.replace('/view/recipe/', '/api/recipe/').replace(re.split('/view/recipe/[0-9]+', url)[1],
-                                                                         '') + '?share=' +
-                    re.split('/view/recipe/[0-9]+', url)[1].replace('/', '')).json()
-                recipe_json = clean_dict(recipe_json, 'id')
-                serialized_recipe = RecipeExportSerializer(data=recipe_json, context={'request': request})
-                if serialized_recipe.is_valid():
-                    recipe = serialized_recipe.save()
-                    if validators.url(recipe_json['image'], public=True):
-                        recipe.image = File(handle_image(request,
-                                                         File(io.BytesIO(requests.get(recipe_json['image']).content),
-                                                              name='image'),
-                                                         filetype=pathlib.Path(recipe_json['image']).suffix),
-                                            name=f'{uuid.uuid4()}_{recipe.pk}{pathlib.Path(recipe_json["image"]).suffix}')
-                    recipe.save()
-                    return Response({
-                        'link': request.build_absolute_uri(reverse('view_recipe', args={recipe.pk}))
-                    }, status=status.HTTP_201_CREATED)
-            else:
-                try:
-                    if validators.url(url, public=True):
-                        scrape = scrape_me(url_path=url, wild_mode=True)
+class RecipeUrlImportView(ObtainAuthToken):
+    throttle_classes = [RecipeImportThrottle]
+    permission_classes = [CustomIsUser & CustomTokenHasReadWriteScope]
 
-                    else:
+    def post(self, request, *args, **kwargs):
+        """
+        function to retrieve a recipe from a given url or source string
+        :param request: standard request with additional post parameters
+                - url: url to use for importing recipe
+                - data: if no url is given recipe is imported from provided source data
+                - (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
+        :return: JsonResponse containing the parsed json and images
+        """
+        scrape = None
+        serializer = RecipeFromSourceSerializer(data=request.data)
+        if serializer.is_valid():
+
+            if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (
+                    bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
+                serializer.validated_data['url'] = bookmarklet.url
+                serializer.validated_data['data'] = bookmarklet.html
+                bookmarklet.delete()
+
+            url = serializer.validated_data.get('url', None)
+            data = unquote(serializer.validated_data.get('data', None))
+            if not url and not data:
+                return Response({
+                    'error': True,
+                    'msg': _('Nothing to do.')
+                }, status=status.HTTP_400_BAD_REQUEST)
+
+            elif url and not data:
+                if re.match('^(https?://)?(www\\.youtube\\.com|youtu\\.be)/.+$', url):
+                    if validators.url(url, public=True):
+                        return Response({
+                            'recipe_json': get_from_youtube_scraper(url, request),
+                            'recipe_images': [],
+                        }, status=status.HTTP_200_OK)
+                if re.match(
+                        '^(.)*/view/recipe/[0-9]+/[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
+                        url):
+                    recipe_json = requests.get(
+                        url.replace('/view/recipe/', '/api/recipe/').replace(re.split('/view/recipe/[0-9]+', url)[1],
+                                                                             '') + '?share=' +
+                        re.split('/view/recipe/[0-9]+', url)[1].replace('/', '')).json()
+                    recipe_json = clean_dict(recipe_json, 'id')
+                    serialized_recipe = RecipeExportSerializer(data=recipe_json, context={'request': request})
+                    if serialized_recipe.is_valid():
+                        recipe = serialized_recipe.save()
+                        if validators.url(recipe_json['image'], public=True):
+                            recipe.image = File(handle_image(request,
+                                                             File(io.BytesIO(requests.get(recipe_json['image']).content),
+                                                                  name='image'),
+                                                             filetype=pathlib.Path(recipe_json['image']).suffix),
+                                                name=f'{uuid.uuid4()}_{recipe.pk}{pathlib.Path(recipe_json["image"]).suffix}')
+                        recipe.save()
+                        return Response({
+                            'link': request.build_absolute_uri(reverse('view_recipe', args={recipe.pk}))
+                        }, status=status.HTTP_201_CREATED)
+                else:
+                    try:
+                        if validators.url(url, public=True):
+                            scrape = scrape_me(url_path=url, wild_mode=True)
+
+                        else:
+                            return Response({
+                                'error': True,
+                                'msg': _('Invalid Url')
+                            }, status=status.HTTP_400_BAD_REQUEST)
+                    except NoSchemaFoundInWildMode:
+                        pass
+                    except requests.exceptions.ConnectionError:
+                        return Response({
+                            'error': True,
+                            'msg': _('Connection Refused.')
+                        }, status=status.HTTP_400_BAD_REQUEST)
+                    except requests.exceptions.MissingSchema:
                         return Response({
                             'error': True,
-                            'msg': _('Invalid Url')
+                            'msg': _('Bad URL Schema.')
                         }, status=status.HTTP_400_BAD_REQUEST)
-                except NoSchemaFoundInWildMode:
+            else:
+                try:
+                    data_json = json.loads(data)
+                    if '@context' not in data_json:
+                        data_json['@context'] = 'https://schema.org'
+                    if '@type' not in data_json:
+                        data_json['@type'] = 'Recipe'
+                    data = "<script type='application/ld+json'>" + json.dumps(data_json) + "</script>"
+                except JSONDecodeError:
                     pass
-                except requests.exceptions.ConnectionError:
-                    return Response({
-                        'error': True,
-                        'msg': _('Connection Refused.')
-                    }, status=status.HTTP_400_BAD_REQUEST)
-                except requests.exceptions.MissingSchema:
-                    return Response({
-                        'error': True,
-                        'msg': _('Bad URL Schema.')
-                    }, status=status.HTTP_400_BAD_REQUEST)
-        else:
-            try:
-                data_json = json.loads(data)
-                if '@context' not in data_json:
-                    data_json['@context'] = 'https://schema.org'
-                if '@type' not in data_json:
-                    data_json['@type'] = 'Recipe'
-                data = "<script type='application/ld+json'>" + json.dumps(data_json) + "</script>"
-            except JSONDecodeError:
-                pass
-            scrape = text_scraper(text=data, url=url)
-            if not url and (found_url := scrape.schema.data.get('url', None)):
-                scrape = text_scraper(text=data, url=found_url)
+                scrape = text_scraper(text=data, url=url)
+                if not url and (found_url := scrape.schema.data.get('url', None)):
+                    scrape = text_scraper(text=data, url=found_url)
 
-        if scrape:
-            return Response({
-                'recipe_json': helper.get_from_scraper(scrape, request),
-                'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
-            }, status=status.HTTP_200_OK)
+            if scrape:
+                return Response({
+                    'recipe_json': helper.get_from_scraper(scrape, request),
+                    'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
+                }, status=status.HTTP_200_OK)
 
+            else:
+                return Response({
+                    'error': True,
+                    'msg': _('No usable data could be found.')
+                }, status=status.HTTP_400_BAD_REQUEST)
         else:
-            return Response({
-                'error': True,
-                'msg': _('No usable data could be found.')
-            }, status=status.HTTP_400_BAD_REQUEST)
-    else:
-        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
+            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
 
 
 @api_view(['GET'])

diff --git a/recipes/settings.py b/recipes/settings.py
@@ -96,6 +96,8 @@
 
 ACCOUNT_SIGNUP_FORM_CLASS = 'cookbook.forms.AllAuthSignupForm'
 
+DRF_THROTTLE_RECIPE_URL_IMPORT = os.getenv('DRF_THROTTLE_RECIPE_URL_IMPORT', '60/hour')
+
 TERMS_URL = os.getenv('TERMS_URL', '')
 PRIVACY_URL = os.getenv('PRIVACY_URL', '')
 IMPRINT_URL = os.getenv('IMPRINT_URL', '')