Skip to content

Commit

Permalink
added ability to set rate limiting for url import
Browse files Browse the repository at this point in the history
  • Loading branch information
vabene1111 committed Dec 16, 2023
1 parent 76b8489 commit dd3e91e
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 104 deletions.
6 changes: 5 additions & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,8 @@ REMOTE_USER_AUTH=0
# EXPORT_FILE_CACHE_DURATION=600

# if you want to do many requests to the FDC API you need to get a (free) API key. Demo key is limited to 30 requests / hour or 50 requests / day
#FDC_API_KEY=DEMO_KEY
#FDC_API_KEY=DEMO_KEY

# API throttle limits
# you may use X per second, minute, hour or day
# DRF_THROTTLE_RECIPE_URL_IMPORT=60/hour
2 changes: 1 addition & 1 deletion cookbook/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def extend(self, r):
path('api/sync_all/', api.sync_all, name='api_sync'),
path('api/log_cooking/<int:recipe_id>/', api.log_cooking, name='api_log_cooking'),
path('api/plan-ical/<slug:from_date>/<slug:to_date>/', api.get_plan_ical, name='api_get_plan_ical'),
path('api/recipe-from-source/', api.recipe_from_source, name='api_recipe_from_source'),
path('api/recipe-from-source/', api.RecipeUrlImportView.as_view(), name='api_recipe_from_source'),
path('api/backup/', api.get_backup, name='api_backup'),
path('api/ingredient-from-string/', api.ingredient_from_string, name='api_ingredient_from_string'),
path('api/share-link/<int:pk>', api.share_link, name='api_share_link'),
Expand Down
208 changes: 106 additions & 102 deletions cookbook/views/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from rest_framework.parsers import MultiPartParser
from rest_framework.renderers import JSONRenderer, TemplateHTMLRenderer
from rest_framework.response import Response
from rest_framework.throttling import AnonRateThrottle
from rest_framework.throttling import AnonRateThrottle, UserRateThrottle
from rest_framework.views import APIView
from rest_framework.viewsets import ViewSetMixin
from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
Expand Down Expand Up @@ -104,7 +104,7 @@
UserSerializer, UserSpaceSerializer, ViewLogSerializer)
from cookbook.views.import_export import get_integration
from recipes import settings
from recipes.settings import FDC_API_KEY
from recipes.settings import FDC_API_KEY, DRF_THROTTLE_RECIPE_URL_IMPORT


class StandardFilterMixin(ViewSetMixin):
Expand Down Expand Up @@ -1298,6 +1298,10 @@ class AuthTokenThrottle(AnonRateThrottle):
rate = '10/day'


class RecipeImportThrottle(UserRateThrottle):
rate = DRF_THROTTLE_RECIPE_URL_IMPORT


class CustomAuthToken(ObtainAuthToken):
throttle_classes = [AuthTokenThrottle]

Expand All @@ -1323,114 +1327,114 @@ def post(self, request, *args, **kwargs):
})


@api_view(['POST'])
# @schema(AutoSchema()) #TODO add proper schema
@permission_classes([CustomIsUser & CustomTokenHasReadWriteScope])
# TODO add rate limiting
def recipe_from_source(request):
"""
function to retrieve a recipe from a given url or source string
:param request: standard request with additional post parameters
- url: url to use for importing recipe
- data: if no url is given recipe is imported from provided source data
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
:return: JsonResponse containing the parsed json and images
"""
scrape = None
serializer = RecipeFromSourceSerializer(data=request.data)
if serializer.is_valid():

if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (
bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
serializer.validated_data['url'] = bookmarklet.url
serializer.validated_data['data'] = bookmarklet.html
bookmarklet.delete()

url = serializer.validated_data.get('url', None)
data = unquote(serializer.validated_data.get('data', None))
if not url and not data:
return Response({
'error': True,
'msg': _('Nothing to do.')
}, status=status.HTTP_400_BAD_REQUEST)

elif url and not data:
if re.match('^(https?://)?(www\\.youtube\\.com|youtu\\.be)/.+$', url):
if validators.url(url, public=True):
return Response({
'recipe_json': get_from_youtube_scraper(url, request),
'recipe_images': [],
}, status=status.HTTP_200_OK)
if re.match(
'^(.)*/view/recipe/[0-9]+/[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
url):
recipe_json = requests.get(
url.replace('/view/recipe/', '/api/recipe/').replace(re.split('/view/recipe/[0-9]+', url)[1],
'') + '?share=' +
re.split('/view/recipe/[0-9]+', url)[1].replace('/', '')).json()
recipe_json = clean_dict(recipe_json, 'id')
serialized_recipe = RecipeExportSerializer(data=recipe_json, context={'request': request})
if serialized_recipe.is_valid():
recipe = serialized_recipe.save()
if validators.url(recipe_json['image'], public=True):
recipe.image = File(handle_image(request,
File(io.BytesIO(requests.get(recipe_json['image']).content),
name='image'),
filetype=pathlib.Path(recipe_json['image']).suffix),
name=f'{uuid.uuid4()}_{recipe.pk}{pathlib.Path(recipe_json["image"]).suffix}')
recipe.save()
return Response({
'link': request.build_absolute_uri(reverse('view_recipe', args={recipe.pk}))
}, status=status.HTTP_201_CREATED)
else:
try:
if validators.url(url, public=True):
scrape = scrape_me(url_path=url, wild_mode=True)
class RecipeUrlImportView(ObtainAuthToken):
throttle_classes = [RecipeImportThrottle]
permission_classes = [CustomIsUser & CustomTokenHasReadWriteScope]

else:
def post(self, request, *args, **kwargs):
"""
function to retrieve a recipe from a given url or source string
:param request: standard request with additional post parameters
- url: url to use for importing recipe
- data: if no url is given recipe is imported from provided source data
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
:return: JsonResponse containing the parsed json and images
"""
scrape = None
serializer = RecipeFromSourceSerializer(data=request.data)
if serializer.is_valid():

if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (
bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
serializer.validated_data['url'] = bookmarklet.url
serializer.validated_data['data'] = bookmarklet.html
bookmarklet.delete()

url = serializer.validated_data.get('url', None)
data = unquote(serializer.validated_data.get('data', None))
if not url and not data:
return Response({
'error': True,
'msg': _('Nothing to do.')
}, status=status.HTTP_400_BAD_REQUEST)

elif url and not data:
if re.match('^(https?://)?(www\\.youtube\\.com|youtu\\.be)/.+$', url):
if validators.url(url, public=True):
return Response({
'recipe_json': get_from_youtube_scraper(url, request),
'recipe_images': [],
}, status=status.HTTP_200_OK)
if re.match(
'^(.)*/view/recipe/[0-9]+/[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
url):
recipe_json = requests.get(
url.replace('/view/recipe/', '/api/recipe/').replace(re.split('/view/recipe/[0-9]+', url)[1],
'') + '?share=' +
re.split('/view/recipe/[0-9]+', url)[1].replace('/', '')).json()
recipe_json = clean_dict(recipe_json, 'id')
serialized_recipe = RecipeExportSerializer(data=recipe_json, context={'request': request})
if serialized_recipe.is_valid():
recipe = serialized_recipe.save()
if validators.url(recipe_json['image'], public=True):
recipe.image = File(handle_image(request,
File(io.BytesIO(requests.get(recipe_json['image']).content),
name='image'),
filetype=pathlib.Path(recipe_json['image']).suffix),
name=f'{uuid.uuid4()}_{recipe.pk}{pathlib.Path(recipe_json["image"]).suffix}')
recipe.save()
return Response({
'link': request.build_absolute_uri(reverse('view_recipe', args={recipe.pk}))
}, status=status.HTTP_201_CREATED)
else:
try:
if validators.url(url, public=True):
scrape = scrape_me(url_path=url, wild_mode=True)

else:
return Response({
'error': True,
'msg': _('Invalid Url')
}, status=status.HTTP_400_BAD_REQUEST)
except NoSchemaFoundInWildMode:
pass
except requests.exceptions.ConnectionError:
return Response({
'error': True,
'msg': _('Connection Refused.')
}, status=status.HTTP_400_BAD_REQUEST)
except requests.exceptions.MissingSchema:
return Response({
'error': True,
'msg': _('Invalid Url')
'msg': _('Bad URL Schema.')
}, status=status.HTTP_400_BAD_REQUEST)
except NoSchemaFoundInWildMode:
else:
try:
data_json = json.loads(data)
if '@context' not in data_json:
data_json['@context'] = 'https://schema.org'
if '@type' not in data_json:
data_json['@type'] = 'Recipe'
data = "<script type='application/ld+json'>" + json.dumps(data_json) + "</script>"
except JSONDecodeError:
pass
except requests.exceptions.ConnectionError:
return Response({
'error': True,
'msg': _('Connection Refused.')
}, status=status.HTTP_400_BAD_REQUEST)
except requests.exceptions.MissingSchema:
return Response({
'error': True,
'msg': _('Bad URL Schema.')
}, status=status.HTTP_400_BAD_REQUEST)
else:
try:
data_json = json.loads(data)
if '@context' not in data_json:
data_json['@context'] = 'https://schema.org'
if '@type' not in data_json:
data_json['@type'] = 'Recipe'
data = "<script type='application/ld+json'>" + json.dumps(data_json) + "</script>"
except JSONDecodeError:
pass
scrape = text_scraper(text=data, url=url)
if not url and (found_url := scrape.schema.data.get('url', None)):
scrape = text_scraper(text=data, url=found_url)
scrape = text_scraper(text=data, url=url)
if not url and (found_url := scrape.schema.data.get('url', None)):
scrape = text_scraper(text=data, url=found_url)

if scrape:
return Response({
'recipe_json': helper.get_from_scraper(scrape, request),
'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
}, status=status.HTTP_200_OK)
if scrape:
return Response({
'recipe_json': helper.get_from_scraper(scrape, request),
'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
}, status=status.HTTP_200_OK)

else:
return Response({
'error': True,
'msg': _('No usable data could be found.')
}, status=status.HTTP_400_BAD_REQUEST)
else:
return Response({
'error': True,
'msg': _('No usable data could be found.')
}, status=status.HTTP_400_BAD_REQUEST)
else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)


@api_view(['GET'])
Expand Down
2 changes: 2 additions & 0 deletions recipes/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@

ACCOUNT_SIGNUP_FORM_CLASS = 'cookbook.forms.AllAuthSignupForm'

DRF_THROTTLE_RECIPE_URL_IMPORT = os.getenv('DRF_THROTTLE_RECIPE_URL_IMPORT', '60/hour')

TERMS_URL = os.getenv('TERMS_URL', '')
PRIVACY_URL = os.getenv('PRIVACY_URL', '')
IMPRINT_URL = os.getenv('IMPRINT_URL', '')
Expand Down

0 comments on commit dd3e91e

Please sign in to comment.