diff --git a/.env.template b/.env.template index 1e57b4a12d..3c91b3195e 100644 --- a/.env.template +++ b/.env.template @@ -184,4 +184,8 @@ REMOTE_USER_AUTH=0 # EXPORT_FILE_CACHE_DURATION=600 # if you want to do many requests to the FDC API you need to get a (free) API key. Demo key is limited to 30 requests / hour or 50 requests / day -#FDC_API_KEY=DEMO_KEY \ No newline at end of file +#FDC_API_KEY=DEMO_KEY + +# API throttle limits +# you may use X per second, minute, hour or day +# DRF_THROTTLE_RECIPE_URL_IMPORT=60/hour \ No newline at end of file diff --git a/cookbook/urls.py b/cookbook/urls.py index db8680e5ef..1b1e6548f0 100644 --- a/cookbook/urls.py +++ b/cookbook/urls.py @@ -129,7 +129,7 @@ def extend(self, r): path('api/sync_all/', api.sync_all, name='api_sync'), path('api/log_cooking//', api.log_cooking, name='api_log_cooking'), path('api/plan-ical///', api.get_plan_ical, name='api_get_plan_ical'), - path('api/recipe-from-source/', api.recipe_from_source, name='api_recipe_from_source'), + path('api/recipe-from-source/', api.RecipeUrlImportView.as_view(), name='api_recipe_from_source'), path('api/backup/', api.get_backup, name='api_backup'), path('api/ingredient-from-string/', api.ingredient_from_string, name='api_ingredient_from_string'), path('api/share-link/', api.share_link, name='api_share_link'), diff --git a/cookbook/views/api.py b/cookbook/views/api.py index d97d5f739c..e0f5cffa3b 100644 --- a/cookbook/views/api.py +++ b/cookbook/views/api.py @@ -46,7 +46,7 @@ from rest_framework.parsers import MultiPartParser from rest_framework.renderers import JSONRenderer, TemplateHTMLRenderer from rest_framework.response import Response -from rest_framework.throttling import AnonRateThrottle +from rest_framework.throttling import AnonRateThrottle, UserRateThrottle from rest_framework.views import APIView from rest_framework.viewsets import ViewSetMixin from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow @@ -104,7 +104,7 @@ UserSerializer, UserSpaceSerializer, ViewLogSerializer) from cookbook.views.import_export import get_integration from recipes import settings -from recipes.settings import FDC_API_KEY +from recipes.settings import FDC_API_KEY, DRF_THROTTLE_RECIPE_URL_IMPORT class StandardFilterMixin(ViewSetMixin): @@ -1298,6 +1298,10 @@ class AuthTokenThrottle(AnonRateThrottle): rate = '10/day' +class RecipeImportThrottle(UserRateThrottle): + rate = DRF_THROTTLE_RECIPE_URL_IMPORT + + class CustomAuthToken(ObtainAuthToken): throttle_classes = [AuthTokenThrottle] @@ -1323,114 +1327,114 @@ def post(self, request, *args, **kwargs): }) -@api_view(['POST']) -# @schema(AutoSchema()) #TODO add proper schema -@permission_classes([CustomIsUser & CustomTokenHasReadWriteScope]) -# TODO add rate limiting -def recipe_from_source(request): - """ - function to retrieve a recipe from a given url or source string - :param request: standard request with additional post parameters - - url: url to use for importing recipe - - data: if no url is given recipe is imported from provided source data - - (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes - :return: JsonResponse containing the parsed json and images - """ - scrape = None - serializer = RecipeFromSourceSerializer(data=request.data) - if serializer.is_valid(): - - if (b_pk := serializer.validated_data.get('bookmarklet', None)) and ( - bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()): - serializer.validated_data['url'] = bookmarklet.url - serializer.validated_data['data'] = bookmarklet.html - bookmarklet.delete() - - url = serializer.validated_data.get('url', None) - data = unquote(serializer.validated_data.get('data', None)) - if not url and not data: - return Response({ - 'error': True, - 'msg': _('Nothing to do.') - }, status=status.HTTP_400_BAD_REQUEST) - - elif url and not data: - if re.match('^(https?://)?(www\\.youtube\\.com|youtu\\.be)/.+$', url): - if validators.url(url, public=True): - return Response({ - 'recipe_json': get_from_youtube_scraper(url, request), - 'recipe_images': [], - }, status=status.HTTP_200_OK) - if re.match( - '^(.)*/view/recipe/[0-9]+/[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$', - url): - recipe_json = requests.get( - url.replace('/view/recipe/', '/api/recipe/').replace(re.split('/view/recipe/[0-9]+', url)[1], - '') + '?share=' + - re.split('/view/recipe/[0-9]+', url)[1].replace('/', '')).json() - recipe_json = clean_dict(recipe_json, 'id') - serialized_recipe = RecipeExportSerializer(data=recipe_json, context={'request': request}) - if serialized_recipe.is_valid(): - recipe = serialized_recipe.save() - if validators.url(recipe_json['image'], public=True): - recipe.image = File(handle_image(request, - File(io.BytesIO(requests.get(recipe_json['image']).content), - name='image'), - filetype=pathlib.Path(recipe_json['image']).suffix), - name=f'{uuid.uuid4()}_{recipe.pk}{pathlib.Path(recipe_json["image"]).suffix}') - recipe.save() - return Response({ - 'link': request.build_absolute_uri(reverse('view_recipe', args={recipe.pk})) - }, status=status.HTTP_201_CREATED) - else: - try: - if validators.url(url, public=True): - scrape = scrape_me(url_path=url, wild_mode=True) +class RecipeUrlImportView(ObtainAuthToken): + throttle_classes = [RecipeImportThrottle] + permission_classes = [CustomIsUser & CustomTokenHasReadWriteScope] - else: + def post(self, request, *args, **kwargs): + """ + function to retrieve a recipe from a given url or source string + :param request: standard request with additional post parameters + - url: url to use for importing recipe + - data: if no url is given recipe is imported from provided source data + - (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes + :return: JsonResponse containing the parsed json and images + """ + scrape = None + serializer = RecipeFromSourceSerializer(data=request.data) + if serializer.is_valid(): + + if (b_pk := serializer.validated_data.get('bookmarklet', None)) and ( + bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()): + serializer.validated_data['url'] = bookmarklet.url + serializer.validated_data['data'] = bookmarklet.html + bookmarklet.delete() + + url = serializer.validated_data.get('url', None) + data = unquote(serializer.validated_data.get('data', None)) + if not url and not data: + return Response({ + 'error': True, + 'msg': _('Nothing to do.') + }, status=status.HTTP_400_BAD_REQUEST) + + elif url and not data: + if re.match('^(https?://)?(www\\.youtube\\.com|youtu\\.be)/.+$', url): + if validators.url(url, public=True): + return Response({ + 'recipe_json': get_from_youtube_scraper(url, request), + 'recipe_images': [], + }, status=status.HTTP_200_OK) + if re.match( + '^(.)*/view/recipe/[0-9]+/[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$', + url): + recipe_json = requests.get( + url.replace('/view/recipe/', '/api/recipe/').replace(re.split('/view/recipe/[0-9]+', url)[1], + '') + '?share=' + + re.split('/view/recipe/[0-9]+', url)[1].replace('/', '')).json() + recipe_json = clean_dict(recipe_json, 'id') + serialized_recipe = RecipeExportSerializer(data=recipe_json, context={'request': request}) + if serialized_recipe.is_valid(): + recipe = serialized_recipe.save() + if validators.url(recipe_json['image'], public=True): + recipe.image = File(handle_image(request, + File(io.BytesIO(requests.get(recipe_json['image']).content), + name='image'), + filetype=pathlib.Path(recipe_json['image']).suffix), + name=f'{uuid.uuid4()}_{recipe.pk}{pathlib.Path(recipe_json["image"]).suffix}') + recipe.save() + return Response({ + 'link': request.build_absolute_uri(reverse('view_recipe', args={recipe.pk})) + }, status=status.HTTP_201_CREATED) + else: + try: + if validators.url(url, public=True): + scrape = scrape_me(url_path=url, wild_mode=True) + + else: + return Response({ + 'error': True, + 'msg': _('Invalid Url') + }, status=status.HTTP_400_BAD_REQUEST) + except NoSchemaFoundInWildMode: + pass + except requests.exceptions.ConnectionError: + return Response({ + 'error': True, + 'msg': _('Connection Refused.') + }, status=status.HTTP_400_BAD_REQUEST) + except requests.exceptions.MissingSchema: return Response({ 'error': True, - 'msg': _('Invalid Url') + 'msg': _('Bad URL Schema.') }, status=status.HTTP_400_BAD_REQUEST) - except NoSchemaFoundInWildMode: + else: + try: + data_json = json.loads(data) + if '@context' not in data_json: + data_json['@context'] = 'https://schema.org' + if '@type' not in data_json: + data_json['@type'] = 'Recipe' + data = "" + except JSONDecodeError: pass - except requests.exceptions.ConnectionError: - return Response({ - 'error': True, - 'msg': _('Connection Refused.') - }, status=status.HTTP_400_BAD_REQUEST) - except requests.exceptions.MissingSchema: - return Response({ - 'error': True, - 'msg': _('Bad URL Schema.') - }, status=status.HTTP_400_BAD_REQUEST) - else: - try: - data_json = json.loads(data) - if '@context' not in data_json: - data_json['@context'] = 'https://schema.org' - if '@type' not in data_json: - data_json['@type'] = 'Recipe' - data = "" - except JSONDecodeError: - pass - scrape = text_scraper(text=data, url=url) - if not url and (found_url := scrape.schema.data.get('url', None)): - scrape = text_scraper(text=data, url=found_url) + scrape = text_scraper(text=data, url=url) + if not url and (found_url := scrape.schema.data.get('url', None)): + scrape = text_scraper(text=data, url=found_url) - if scrape: - return Response({ - 'recipe_json': helper.get_from_scraper(scrape, request), - 'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))), - }, status=status.HTTP_200_OK) + if scrape: + return Response({ + 'recipe_json': helper.get_from_scraper(scrape, request), + 'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))), + }, status=status.HTTP_200_OK) + else: + return Response({ + 'error': True, + 'msg': _('No usable data could be found.') + }, status=status.HTTP_400_BAD_REQUEST) else: - return Response({ - 'error': True, - 'msg': _('No usable data could be found.') - }, status=status.HTTP_400_BAD_REQUEST) - else: - return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) @api_view(['GET']) diff --git a/recipes/settings.py b/recipes/settings.py index 97276b5323..2f5b95e7e6 100644 --- a/recipes/settings.py +++ b/recipes/settings.py @@ -96,6 +96,8 @@ ACCOUNT_SIGNUP_FORM_CLASS = 'cookbook.forms.AllAuthSignupForm' +DRF_THROTTLE_RECIPE_URL_IMPORT = os.getenv('DRF_THROTTLE_RECIPE_URL_IMPORT', '60/hour') + TERMS_URL = os.getenv('TERMS_URL', '') PRIVACY_URL = os.getenv('PRIVACY_URL', '') IMPRINT_URL = os.getenv('IMPRINT_URL', '')