From bdc84df3a310c3906315e77fc2c65585406de544 Mon Sep 17 00:00:00 2001 From: Olwe Samuel Date: Fri, 13 Dec 2024 12:41:58 +0300 Subject: [PATCH 01/11] Rate limit marketo endpoint --- webapp/decorators.py | 50 ++++++++++++++++++++++++++++++++++++++++++++ webapp/views.py | 26 +++++++++++------------ 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index 0addec6753b..8e99f9b143b 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -1,8 +1,12 @@ # Core packages import functools +import json +from datetime import datetime, timedelta +from typing import Callable # Third party packages import flask + from webapp.login import user_info @@ -20,3 +24,49 @@ def is_user_logged_in(*args, **kwargs): return func(*args, **kwargs) return is_user_logged_in + + +def rate_limit_with_backoff(func: Callable): + """ + Decorator to rate limit function calls based on the users' session. + The rate limit restricts users to: + - 1 request every 2 seconds + - 2 request every 4 seconds + - 3 request every 8 seconds + """ + rate_limit_attempt_map = { + 1: timedelta(seconds=2), + 2: timedelta(seconds=4), + 3: timedelta(seconds=8), + } + ATTEMPT_LIMIT = 3 + + @functools.wraps(func) + def rate_limited(*args, **kwargs): + # Get the initial request timestamp, or update the session with the + # timestamp from the most recent successful request + if initial_request := json.loads(flask.session.get(func.__name__)): + # Get the current limit + current_limit = rate_limit_attempt_map.get(initial_request["attempts"]) + + time_since_last_request = datetime.now() - datetime.fromtimestamp( + initial_request["timestamp"] + ) + # Abort if the time is too early for this number of attempts + if time_since_last_request.total_seconds() < current_limit.total_seconds(): + # Increment the number of attempts. 3 is a hard upper limit. + if initial_request["attempts"] < ATTEMPT_LIMIT: + initial_request["attempts"] += 1 + flask.session[func.__name__] = json.dumps( + initial_request["attempts"] + ) + + return flask.abort(429) + + # Set values for a successful request + flask.session[func.__name__] = json.dumps( + {"timestamp": datetime.now().timestamp(), "attempts": 1} + ) + return func(*args, **kwargs) + + return rate_limited diff --git a/webapp/views.py b/webapp/views.py index 8166a926e8b..b9636896c9a 100644 --- a/webapp/views.py +++ b/webapp/views.py @@ -1,35 +1,35 @@ # Standard library import datetime import html +import json import math import os import re -import json +from urllib.parse import quote # Packages import dateutil import feedparser import flask +import jinja2 import talisker.requests import yaml -import jinja2 -from ubuntu_release_info.data import Data -from geolite2 import geolite2 -from requests import Session -from requests.exceptions import HTTPError -from urllib.parse import quote, unquote - -from canonicalwebteam.search.models import get_search_results -from canonicalwebteam.search.views import NoAPIKeyError from bs4 import BeautifulSoup -from werkzeug.exceptions import BadRequest from canonicalwebteam.discourse import ( DiscourseAPI, - Docs, DocParser, + Docs, ) +from canonicalwebteam.search.models import get_search_results +from canonicalwebteam.search.views import NoAPIKeyError +from geolite2 import geolite2 +from requests import Session +from requests.exceptions import HTTPError +from ubuntu_release_info.data import Data +from werkzeug.exceptions import BadRequest # Local +from webapp.decorators import rate_limit_with_backoff from webapp.login import user_info from webapp.marketo import MarketoAPI @@ -898,7 +898,7 @@ def shorten_acquisition_url(acquisition_url): return new_acquisition_url return acquisition_url - +@rate_limit_with_backoff def marketo_submit(): form_fields = {} for key in flask.request.form: From ac41887070bcba8ad83dddf32844c62846e8b2ed Mon Sep 17 00:00:00 2001 From: Olwe Samuel Date: Fri, 13 Dec 2024 13:18:21 +0300 Subject: [PATCH 02/11] fix: linter issues --- webapp/decorators.py | 9 +++++++-- webapp/views.py | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index 8e99f9b143b..d7a95d1da52 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -47,13 +47,18 @@ def rate_limited(*args, **kwargs): # timestamp from the most recent successful request if initial_request := json.loads(flask.session.get(func.__name__)): # Get the current limit - current_limit = rate_limit_attempt_map.get(initial_request["attempts"]) + current_limit = rate_limit_attempt_map.get( + initial_request["attempts"] + ) time_since_last_request = datetime.now() - datetime.fromtimestamp( initial_request["timestamp"] ) # Abort if the time is too early for this number of attempts - if time_since_last_request.total_seconds() < current_limit.total_seconds(): + if ( + time_since_last_request.total_seconds() + < current_limit.total_seconds() + ): # Increment the number of attempts. 3 is a hard upper limit. if initial_request["attempts"] < ATTEMPT_LIMIT: initial_request["attempts"] += 1 diff --git a/webapp/views.py b/webapp/views.py index b9636896c9a..7ca98189bf5 100644 --- a/webapp/views.py +++ b/webapp/views.py @@ -898,6 +898,7 @@ def shorten_acquisition_url(acquisition_url): return new_acquisition_url return acquisition_url + @rate_limit_with_backoff def marketo_submit(): form_fields = {} From 4ffaa7a9dc09ab384c249a17cf3178bfb6bd1231 Mon Sep 17 00:00:00 2001 From: Olwe Samuel Date: Fri, 13 Dec 2024 14:08:25 +0300 Subject: [PATCH 03/11] Suppress errors when timestamp not set in session --- webapp/decorators.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index d7a95d1da52..6fece91f4c7 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -1,4 +1,5 @@ # Core packages +import contextlib import functools import json from datetime import datetime, timedelta @@ -30,14 +31,14 @@ def rate_limit_with_backoff(func: Callable): """ Decorator to rate limit function calls based on the users' session. The rate limit restricts users to: - - 1 request every 2 seconds - - 2 request every 4 seconds - - 3 request every 8 seconds + - 1 request every 8 seconds + - 2 request every 16 seconds + - 3 request every 32 seconds """ rate_limit_attempt_map = { - 1: timedelta(seconds=2), - 2: timedelta(seconds=4), - 3: timedelta(seconds=8), + 1: timedelta(seconds=8), + 2: timedelta(seconds=16), + 3: timedelta(seconds=32), } ATTEMPT_LIMIT = 3 @@ -45,7 +46,8 @@ def rate_limit_with_backoff(func: Callable): def rate_limited(*args, **kwargs): # Get the initial request timestamp, or update the session with the # timestamp from the most recent successful request - if initial_request := json.loads(flask.session.get(func.__name__)): + with contextlib.suppress(TypeError): + initial_request = json.loads(flask.session.get(func.__name__)) # Get the current limit current_limit = rate_limit_attempt_map.get( initial_request["attempts"] From e2f9c1c6f2f4ac8779bfe92fd63a6d046c0e1d6b Mon Sep 17 00:00:00 2001 From: Olwe Samuel Date: Mon, 16 Dec 2024 10:55:04 +0300 Subject: [PATCH 04/11] Update decorators.py --- webapp/decorators.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index 6fece91f4c7..047e596e487 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -64,9 +64,7 @@ def rate_limited(*args, **kwargs): # Increment the number of attempts. 3 is a hard upper limit. if initial_request["attempts"] < ATTEMPT_LIMIT: initial_request["attempts"] += 1 - flask.session[func.__name__] = json.dumps( - initial_request["attempts"] - ) + flask.session[func.__name__] = json.dumps(initial_request) return flask.abort(429) From ec641bb641e133a665110ee8739eb94be0c73980 Mon Sep 17 00:00:00 2001 From: Samuel Olwe Date: Fri, 20 Dec 2024 20:32:13 +0300 Subject: [PATCH 05/11] feat: allow passing rate limit params --- webapp/decorators.py | 87 ++++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index 047e596e487..e27f48be20a 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -1,13 +1,11 @@ # Core packages -import contextlib import functools import json from datetime import datetime, timedelta -from typing import Callable +from typing import Callable, Optional # Third party packages import flask - from webapp.login import user_info @@ -27,51 +25,70 @@ def is_user_logged_in(*args, **kwargs): return is_user_logged_in -def rate_limit_with_backoff(func: Callable): +def rate_limit_with_backoff( + func: Callable, limits: Optional[tuple[int, int]] = None +) -> Callable: """ - Decorator to rate limit function calls based on the users' session. - The rate limit restricts users to: - - 1 request every 8 seconds - - 2 request every 16 seconds - - 3 request every 32 seconds + Decorator to rate limit function calls based on the users' + session. The default rate limit restricts users to: + - 1 request every 4 seconds + - 4 requests every 60 seconds + + This can be overwritten with the limits argument e.g. + @rate_limit_with_backoff(limits=(1, 10)) + for 1 request every 10 seconds. + + @param func: Function to decorate + @param limits: Tuple of (requests, seconds) request limit mappings """ + rate_limit_attempt_map = { - 1: timedelta(seconds=8), - 2: timedelta(seconds=16), - 3: timedelta(seconds=32), + 1: timedelta(seconds=4), + 4: timedelta(seconds=60), } - ATTEMPT_LIMIT = 3 + + if limits: + additional_limits = {i: timedelta(seconds=j) for i, j in limits} + rate_limit_attempt_map = additional_limits + + # The request limit is derived from the limit attempt map + request_limit = max(sorted(rate_limit_attempt_map.keys(), reverse=True)) @functools.wraps(func) def rate_limited(*args, **kwargs): - # Get the initial request timestamp, or update the session with the - # timestamp from the most recent successful request - with contextlib.suppress(TypeError): - initial_request = json.loads(flask.session.get(func.__name__)) + try: + # Get the initial request + initial_request = json.loads(flask.session[func.__name__]) # Get the current limit - current_limit = rate_limit_attempt_map.get( - initial_request["attempts"] - ) + seconds_limit = rate_limit_attempt_map.get(request_limit) + for limit in sorted(rate_limit_attempt_map.keys()): + seconds_limit = rate_limit_attempt_map.get(limit) + if limit > initial_request["attempts"]: + break time_since_last_request = datetime.now() - datetime.fromtimestamp( initial_request["timestamp"] ) # Abort if the time is too early for this number of attempts - if ( - time_since_last_request.total_seconds() - < current_limit.total_seconds() - ): - # Increment the number of attempts. 3 is a hard upper limit. - if initial_request["attempts"] < ATTEMPT_LIMIT: - initial_request["attempts"] += 1 - flask.session[func.__name__] = json.dumps(initial_request) - - return flask.abort(429) - - # Set values for a successful request - flask.session[func.__name__] = json.dumps( - {"timestamp": datetime.now().timestamp(), "attempts": 1} - ) + # Or if the max number of attempts has been exceeded + if initial_request["attempts"] >= request_limit: + # Reset the timer if we have exceeded the limit + if ( + time_since_last_request.total_seconds() + < seconds_limit.total_seconds() + ): + return flask.abort(429) + else: + initial_request["timestamp"] = datetime.now() + + # Otherwise update the session + initial_request["attempts"] += 1 + flask.session[func.__name__] = json.dumps(initial_request) + except (KeyError, TypeError): + # Set values for initial request + flask.session[func.__name__] = json.dumps( + {"timestamp": datetime.now().timestamp(), "attempts": 1} + ) return func(*args, **kwargs) return rate_limited From 31d94696bc33bcc86143c7d7b04b99e232705f39 Mon Sep 17 00:00:00 2001 From: Samuel Olwe Date: Fri, 20 Dec 2024 20:38:15 +0300 Subject: [PATCH 06/11] chore: update comments --- webapp/decorators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index e27f48be20a..9a882a584fc 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -59,7 +59,7 @@ def rate_limited(*args, **kwargs): try: # Get the initial request initial_request = json.loads(flask.session[func.__name__]) - # Get the current limit + # Get the seconds limit for these attempts seconds_limit = rate_limit_attempt_map.get(request_limit) for limit in sorted(rate_limit_attempt_map.keys()): seconds_limit = rate_limit_attempt_map.get(limit) @@ -72,13 +72,13 @@ def rate_limited(*args, **kwargs): # Abort if the time is too early for this number of attempts # Or if the max number of attempts has been exceeded if initial_request["attempts"] >= request_limit: - # Reset the timer if we have exceeded the limit if ( time_since_last_request.total_seconds() < seconds_limit.total_seconds() ): return flask.abort(429) else: + # Reset the timer if we have exceeded the limit initial_request["timestamp"] = datetime.now() # Otherwise update the session From 7a5fabe1e03230cc97090f21b41ad1a07f9f179b Mon Sep 17 00:00:00 2001 From: Samuel Olwe Date: Wed, 8 Jan 2025 16:04:01 +0300 Subject: [PATCH 07/11] Add test for rate limiter --- tests/test_decorators.py | 48 ++++++++++++++++++++++++++++++++++++++++ webapp/decorators.py | 3 ++- 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 tests/test_decorators.py diff --git a/tests/test_decorators.py b/tests/test_decorators.py new file mode 100644 index 00000000000..8b61321009c --- /dev/null +++ b/tests/test_decorators.py @@ -0,0 +1,48 @@ +# Packages +from time import sleep +from unittest import TestCase + +import werkzeug + +from webapp.app import app +from webapp.decorators import rate_limit_with_backoff + + +class TestDecorators(TestCase): + def test_rate_limit_with_backoff_blocks_requests(self): + """ + Test that functions generated by rate_limit_with_backoff are rate + limited and that they backoff when the rate limit is exceeded. + """ + + def fn(): + sleep(0.1) + return True + + with app.test_request_context(): + # Limit to calls once every second + rate_limited_fn = rate_limit_with_backoff(fn, (1, 1)) + + # Should raise an exception + with self.assertRaises(werkzeug.exceptions.TooManyRequests): + while True: + rate_limited_fn() + + def test_rate_limit_with_backoff_allows_requests(self): + """ + Test that functions generated by rate_limit_with_backoff are rate + limited and that they backoff when the rate limit is exceeded. + """ + + def fn(): + sleep(0.1) + return True + + with app.test_request_context(): + # Limit to calls once every second + rate_limited_fn = rate_limit_with_backoff(fn, (1, 1)) + + # Should not raise an exception + for _ in range(3): + sleep(1) + rate_limited_fn() diff --git a/webapp/decorators.py b/webapp/decorators.py index 9a882a584fc..11430319fff 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -6,6 +6,7 @@ # Third party packages import flask + from webapp.login import user_info @@ -48,7 +49,7 @@ def rate_limit_with_backoff( } if limits: - additional_limits = {i: timedelta(seconds=j) for i, j in limits} + additional_limits = {limits[0]: timedelta(seconds=limits[1])} rate_limit_attempt_map = additional_limits # The request limit is derived from the limit attempt map From 5f055a1473cff30771173590d3ec47431574e2f2 Mon Sep 17 00:00:00 2001 From: Samuel Olwe Date: Wed, 8 Jan 2025 16:10:56 +0300 Subject: [PATCH 08/11] chore: made backoff linear to 1req/4s --- webapp/decorators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index 11430319fff..476f96d7017 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -45,7 +45,8 @@ def rate_limit_with_backoff( rate_limit_attempt_map = { 1: timedelta(seconds=4), - 4: timedelta(seconds=60), + 4: timedelta(seconds=16), + 16: timedelta(seconds=64), } if limits: From a595d08696768a3f492f8e05379edcba98f849c4 Mon Sep 17 00:00:00 2001 From: Samuel Olwe Date: Wed, 15 Jan 2025 16:35:53 +0300 Subject: [PATCH 09/11] fix: reset timer when request succeeds --- webapp/decorators.py | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index 476f96d7017..a66d20df8d3 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -53,35 +53,28 @@ def rate_limit_with_backoff( additional_limits = {limits[0]: timedelta(seconds=limits[1])} rate_limit_attempt_map = additional_limits - # The request limit is derived from the limit attempt map - request_limit = max(sorted(rate_limit_attempt_map.keys(), reverse=True)) - @functools.wraps(func) def rate_limited(*args, **kwargs): try: # Get the initial request initial_request = json.loads(flask.session[func.__name__]) - # Get the seconds limit for these attempts - seconds_limit = rate_limit_attempt_map.get(request_limit) for limit in sorted(rate_limit_attempt_map.keys()): - seconds_limit = rate_limit_attempt_map.get(limit) + # Get the seconds limit for these attempts if limit > initial_request["attempts"]: + seconds_limit = rate_limit_attempt_map.get(limit) + time_since_last_request = datetime.now() - datetime.fromtimestamp( + initial_request["timestamp"] + ) + # Abort if the time is too early for this number of attempts + if ( + time_since_last_request.total_seconds() + < seconds_limit.total_seconds() + ): + return flask.abort(429) break - time_since_last_request = datetime.now() - datetime.fromtimestamp( - initial_request["timestamp"] - ) - # Abort if the time is too early for this number of attempts - # Or if the max number of attempts has been exceeded - if initial_request["attempts"] >= request_limit: - if ( - time_since_last_request.total_seconds() - < seconds_limit.total_seconds() - ): - return flask.abort(429) - else: - # Reset the timer if we have exceeded the limit - initial_request["timestamp"] = datetime.now() + # Reset the timestamp if the request succeeds + initial_request["timestamp"] = datetime.now() # Otherwise update the session initial_request["attempts"] += 1 From 34955f0313854f6673f0f2faf6cc46a7fefdcb5f Mon Sep 17 00:00:00 2001 From: Samuel Olwe Date: Wed, 15 Jan 2025 16:55:36 +0300 Subject: [PATCH 10/11] chore: update comments --- webapp/decorators.py | 7 ++++--- webapp/views.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index a66d20df8d3..649b4828edf 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -62,10 +62,11 @@ def rate_limited(*args, **kwargs): # Get the seconds limit for these attempts if limit > initial_request["attempts"]: seconds_limit = rate_limit_attempt_map.get(limit) - time_since_last_request = datetime.now() - datetime.fromtimestamp( - initial_request["timestamp"] + time_since_last_request = ( + datetime.now() + - datetime.fromtimestamp(initial_request["timestamp"]) ) - # Abort if the time is too early for this number of attempts + # Abort if the request is too soon if ( time_since_last_request.total_seconds() < seconds_limit.total_seconds() diff --git a/webapp/views.py b/webapp/views.py index 7ca98189bf5..aacaef69c02 100644 --- a/webapp/views.py +++ b/webapp/views.py @@ -5,7 +5,7 @@ import math import os import re -from urllib.parse import quote +from urllib.parse import quote, unquote # Packages import dateutil From be425fa55d1876be413aad202cd516fc4e574348 Mon Sep 17 00:00:00 2001 From: Samuel Olwe Date: Wed, 15 Jan 2025 18:05:35 +0300 Subject: [PATCH 11/11] fix: match first request --- webapp/decorators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webapp/decorators.py b/webapp/decorators.py index 649b4828edf..7e486fc3212 100644 --- a/webapp/decorators.py +++ b/webapp/decorators.py @@ -60,7 +60,7 @@ def rate_limited(*args, **kwargs): initial_request = json.loads(flask.session[func.__name__]) for limit in sorted(rate_limit_attempt_map.keys()): # Get the seconds limit for these attempts - if limit > initial_request["attempts"]: + if limit >= initial_request["attempts"]: seconds_limit = rate_limit_attempt_map.get(limit) time_since_last_request = ( datetime.now()