From 1aed8535ac15202da3b3fdfbf94f5888819acbcd Mon Sep 17 00:00:00 2001 From: Sergey Yaksanov Date: Sun, 1 Dec 2024 14:16:09 +0300 Subject: [PATCH 1/2] feat: added alertmanager, send alerts to telegram chat --- .env.example | 3 ++ Dockerfile | 4 +++ Makefile | 17 ++++++++++- alerts/__init__.py | 0 alerts/admin.py | 0 alerts/apps.py | 6 ++++ alerts/migrations/__init__.py | 0 alerts/urls.py | 8 ++++++ alerts/views.py | 54 +++++++++++++++++++++++++++++++++++ docker-compose.yml | 12 +++++++- procollab/celery.py | 2 -- procollab/settings.py | 13 +++++++++ procollab/urls.py | 7 +++-- prometheus/alertmanager.yml | 10 +++++++ prometheus/alerts.yml | 19 ++++++++++++ prometheus/prometheus.yml | 18 ++++++++++-- users/metrics.py | 3 ++ users/views.py | 22 ++++++++++++-- 18 files changed, 185 insertions(+), 13 deletions(-) create mode 100644 alerts/__init__.py create mode 100644 alerts/admin.py create mode 100644 alerts/apps.py create mode 100644 alerts/migrations/__init__.py create mode 100644 alerts/urls.py create mode 100644 alerts/views.py create mode 100644 prometheus/alertmanager.yml create mode 100644 prometheus/alerts.yml create mode 100644 users/metrics.py diff --git a/.env.example b/.env.example index 13e8e71d..d70e68e1 100644 --- a/.env.example +++ b/.env.example @@ -27,3 +27,6 @@ TELEGRAM_CHANNEL= CLICKUP_API_TOKEN= CLICKUP_SPACE_ID= + +ALERTMANAGER_TELEGRAM_TOKEN= +ALERTMANAGER_TELEGRAM_CHAT_ID= \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 516d3605..b44875e8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,10 @@ FROM python:3.11 RUN apt update --no-install-recommends -y +RUN apt-get update && \ + apt-get install -y cmake && \ + rm -rf /var/lib/apt/lists/* + ENV PYTHONFAULTHANDLER=1 \ PYTHONUNBUFFERED=1 \ PYTHONHASHSEED=random \ diff --git a/Makefile b/Makefile index 5832d918..1f1bceb7 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,19 @@ up: docker compose -f docker-compose.yml up -d down: - docker compose -f docker-compose.yml down \ No newline at end of file + docker compose -f docker-compose.yml down + +build: + docker compose -f docker-compose.yml build + +superuser: + docker exec -it web poetry run python manage.py createsuperuser + +migrate: + docker exec -it web poetry run python manage.py migrate + +migrations: + docker exec -it web poetry run python manage.py makemigrations + +logs: + docker container logs web \ No newline at end of file diff --git a/alerts/__init__.py b/alerts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/alerts/admin.py b/alerts/admin.py new file mode 100644 index 00000000..e69de29b diff --git a/alerts/apps.py b/alerts/apps.py new file mode 100644 index 00000000..2be23131 --- /dev/null +++ b/alerts/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class AlertsConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "alerts" diff --git a/alerts/migrations/__init__.py b/alerts/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/alerts/urls.py b/alerts/urls.py new file mode 100644 index 00000000..faeb8b1d --- /dev/null +++ b/alerts/urls.py @@ -0,0 +1,8 @@ +from django.urls import path +from .views import alert_webhook + +app_name = "alerts" + +urlpatterns = [ + path("webhook/", alert_webhook, name="alert_webhook"), +] diff --git a/alerts/views.py b/alerts/views.py new file mode 100644 index 00000000..81234c8a --- /dev/null +++ b/alerts/views.py @@ -0,0 +1,54 @@ +import json +import logging +import requests +import socket +from django.conf import settings +from django.http import HttpResponseForbidden +from django.http import JsonResponse +from django.views.decorators.csrf import csrf_exempt + +logger = logging.getLogger(__name__) + + +# todo: refactor this + + +def allow_alertmanager_only(view_func): + def _wrapped_view(request, *args, **kwargs): + alertmanager_ip = socket.gethostbyname("alertmanager") + + client_ip = request.META["REMOTE_ADDR"] + print("abcd", client_ip, alertmanager_ip) + if client_ip == alertmanager_ip: + return view_func(request, *args, **kwargs) + + return HttpResponseForbidden("Forbidden") + + return _wrapped_view + + +@csrf_exempt +@allow_alertmanager_only +def alert_webhook(request): + if request.method == "POST": + try: + payload = json.loads(request.body) + for alert in payload["alerts"]: + message = f"Alert: {alert['annotations']['summary']} - {alert['status']}" + send_telegram_message(message) + + return JsonResponse({"status": "success"}) + except Exception as exc: + logger.error(f"Failed to process alert {exc}", exc_info=exc) + return JsonResponse({"status": "error"}, status=400) + + return JsonResponse({"status": "method not allowed"}, status=400) + + +def send_telegram_message(message): + url = ( + f"https://api.telegram.org/bot{settings.ALERTMANAGER_TELEGRAM_TOKEN}/sendMessage" + ) + data = {"chat_id": settings.ALERTMANAGER_TELEGRAM_CHAT_ID, "text": message} + response = requests.post(url, data=data) + return response.json() diff --git a/docker-compose.yml b/docker-compose.yml index 0bbeda3c..fa0aba95 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,13 +10,14 @@ services: command: bash ./scripts/startup.sh volumes: - ./log:/procollab/log + - ./db.sqlite3:/procollab/db.sqlite3 + - ./:/procollab env_file: - .env environment: HOST: 0.0.0.0 expose: - 8000 - grafana: image: grafana/grafana-enterprise container_name: grafana @@ -36,6 +37,15 @@ services: - prom-data:/prometheus - ./prometheus:/etc/prometheus + alertmanager: + image: prom/alertmanager:latest + container_name: alertmanager + volumes: + - ./prometheus/alertmanager.yml:/etc/alertmanager/alertmanager.yml + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + ports: + - '9093:9093' nginx: container_name: nginx diff --git a/procollab/celery.py b/procollab/celery.py index f56806c8..ea7bbd56 100644 --- a/procollab/celery.py +++ b/procollab/celery.py @@ -1,13 +1,11 @@ import os from celery import Celery -import django # from celery.schedules import crontab from celery.schedules import crontab os.environ.setdefault("DJANGO_SETTINGS_MODULE", "procollab.settings") -django.setup() app = Celery("procollab") diff --git a/procollab/settings.py b/procollab/settings.py index 4a84f7d7..e69a2e38 100644 --- a/procollab/settings.py +++ b/procollab/settings.py @@ -28,6 +28,10 @@ CSRF_TRUSTED_ORIGINS = [ "http://localhost:8000", + "http://localhost:9090", + "http://localhost:9093", + "http://alertmanager:9093", + "http://prometheus:9093", "http://127.0.0.1:8000", "http://0.0.0.0:8000", "https://api.procollab.ru", @@ -51,6 +55,8 @@ "web", # From Docker ] +CORS_ALLOW_CREDENTIALS = True + PASSWORD_HASHERS = [ "django.contrib.auth.hashers.BCryptSHA256PasswordHasher", "django.contrib.auth.hashers.BCryptPasswordHasher", @@ -97,6 +103,7 @@ "mailing.apps.MailingConfig", "feed.apps.FeedConfig", "project_rates.apps.ProjectRatesConfig", + "alerts.apps.AlertsConfig", # Rest framework "rest_framework", "rest_framework_simplejwt", @@ -403,3 +410,9 @@ CELERY_ACCEPT_CONTENT = ["application/json"] CELERY_RESULT_SERIALIZER = "json" CELERY_TASK_SERIALIZER = "json" + +# Alertmanager + +ALERTMANAGER_TELEGRAM_TOKEN = config("ALERTMANAGER_TELEGRAM_TOKEN", cast=str) + +ALERTMANAGER_TELEGRAM_CHAT_ID = config("ALERTMANAGER_TELEGRAM_CHAT_ID", cast=int) diff --git a/procollab/urls.py b/procollab/urls.py index 2a59172e..16070924 100644 --- a/procollab/urls.py +++ b/procollab/urls.py @@ -5,11 +5,11 @@ from drf_yasg import openapi from drf_yasg.views import get_schema_view from rest_framework_simplejwt.views import ( - TokenObtainPairView, TokenRefreshView, TokenVerifyView, ) from core.permissions import IsStaffOrReadOnly +from users.views import GetJWTToken schema_view = get_schema_view( openapi.Info( @@ -50,12 +50,13 @@ path("programs/", include("partner_programs.urls", namespace="partner_programs")), path("rate-project/", include(("project_rates.urls", "rate_projects"))), path("feed/", include("feed.urls", namespace="feed")), - path("api/token/", TokenObtainPairView.as_view(), name="token_obtain_pair"), + path("alerts/", include("alerts.urls", namespace="alerts")), + path("api/token/", GetJWTToken.as_view(), name="token_obtain_pair"), path("api/token/refresh/", TokenRefreshView.as_view(), name="token_refresh"), path("api/token/verify/", TokenVerifyView.as_view(), name="token_verify"), path("", include("metrics.urls", namespace="metrics")), - path("django_prometheus/", include("django_prometheus.urls")), path("anymail/", include("anymail.urls")), + path("django_prometheus/", include("django_prometheus.urls")), ] if settings.DEBUG: diff --git a/prometheus/alertmanager.yml b/prometheus/alertmanager.yml new file mode 100644 index 00000000..2299a690 --- /dev/null +++ b/prometheus/alertmanager.yml @@ -0,0 +1,10 @@ +global: + resolve_timeout: 10s + +route: + receiver: telegram + +receivers: +- name: telegram + webhook_configs: + - url: http://web:8000/alerts/webhook/ diff --git a/prometheus/alerts.yml b/prometheus/alerts.yml new file mode 100644 index 00000000..65085740 --- /dev/null +++ b/prometheus/alerts.yml @@ -0,0 +1,19 @@ +groups: +- name: example + rules: + - alert: SpikeInTokenRequests + expr: rate(get_token_counter_total[1m]) > 3 * rate(get_token_counter_total[5m]) + for: 5m + labels: + severity: critical + annotations: + summary: "Всплеск числа запросов на получение токенов" + description: "Число запросов на получение токенов увеличилось более чем в три раза по сравнению со средним значением за последние 5 минут." + - alert: HighErrorRate + expr: increase(django_http_responses_total_by_status_view_method_total{status=~"5.."}[5m]) / increase(django_http_responses_total_by_status_view_method_total[5m]) > 0.1 + for: 1s + labels: + severity: critical + annotations: + summary: "Высокий уровень 5xx ошибок на {{ $labels.instance }}" + description: "Уровень ошибок {{ $labels.instance }} превышает 5% за последние 5 минут." \ No newline at end of file diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index 35fe0993..1e413928 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -1,10 +1,22 @@ global: - scrape_interval: 15s - evaluation_interval: 15s + scrape_interval: 5s + evaluation_interval: 5s + +rule_files: + - alerts.yml + + +alerting: + alertmanagers: + - static_configs: + - targets: + - 'alertmanager:9093' + scrape_configs: - job_name: monitoring metrics_path: /django_prometheus/metrics static_configs: - targets: - - web:8000 \ No newline at end of file + - web:8000 + diff --git a/users/metrics.py b/users/metrics.py new file mode 100644 index 00000000..93369e89 --- /dev/null +++ b/users/metrics.py @@ -0,0 +1,3 @@ +from prometheus_client import Counter + +GET_TOKEN_COUNTER = Counter("get_token_counter", "Total count of get jwt token calls") diff --git a/users/views.py b/users/views.py index 81600dbc..c3f4f5db 100644 --- a/users/views.py +++ b/users/views.py @@ -54,6 +54,7 @@ VERIFY_EMAIL_REDIRECT_URL, OnboardingStage, ) +from users.metrics import GET_TOKEN_COUNTER from users.models import UserAchievement, LikesOnProject, UserSkillConfirmation from users.permissions import IsAchievementOwnerOrReadOnly from users.serializers import ( @@ -82,7 +83,16 @@ from .schema import USER_PK_PARAM, SKILL_PK_PARAM from .tasks import send_mail_cv +from rest_framework_simplejwt.views import ( + TokenObtainPairView, +) + +import logging + +logger = logging.getLogger(__name__) + User = get_user_model() + Project = apps.get_model("projects", "Project") @@ -614,9 +624,7 @@ def get(self, request, *args, **kwargs) -> HttpResponse: data_preparer = UserCVDataPreparerV2(request.user.pk) user_cv_data: UserCVDataV2 = data_preparer.get_prepared_data() - html_string: str = render_to_string( - data_preparer.TEMPLATE_PATH, user_cv_data - ) + html_string: str = render_to_string(data_preparer.TEMPLATE_PATH, user_cv_data) binary_pdf_file: bytes | None = HTML(string=html_string).write_pdf() encoded_filename: str = urllib.parse.quote( @@ -635,6 +643,7 @@ class UserCVMailing(APIView): Full-fledged work `UserCVDownload`. The user can send a letter once per minute. """ + permission_classes = [IsAuthenticated] def get(self, request, *args, **kwargs): @@ -658,3 +667,10 @@ def get(self, request, *args, **kwargs): cache.set(cache_key, timezone.now(), timeout=cooldown_time) return Response(data={"detail": "success"}, status=status.HTTP_200_OK) + + +class GetJWTToken(TokenObtainPairView): + def post(self, request: Request, *args, **kwargs) -> Response: + # fixme: это тестовая метрика, удалю потом + GET_TOKEN_COUNTER.inc() + return super().post(request, *args, **kwargs) From 829c4568b68934d5ab82c41bc72c5df48a14e000 Mon Sep 17 00:00:00 2001 From: Sergey Yaksanov Date: Sun, 1 Dec 2024 14:19:58 +0300 Subject: [PATCH 2/2] feat: add defaults to env vars --- procollab/settings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/procollab/settings.py b/procollab/settings.py index e69a2e38..83aac140 100644 --- a/procollab/settings.py +++ b/procollab/settings.py @@ -413,6 +413,8 @@ # Alertmanager -ALERTMANAGER_TELEGRAM_TOKEN = config("ALERTMANAGER_TELEGRAM_TOKEN", cast=str) +ALERTMANAGER_TELEGRAM_TOKEN = config("ALERTMANAGER_TELEGRAM_TOKEN", cast=str, default="") -ALERTMANAGER_TELEGRAM_CHAT_ID = config("ALERTMANAGER_TELEGRAM_CHAT_ID", cast=int) +ALERTMANAGER_TELEGRAM_CHAT_ID = config( + "ALERTMANAGER_TELEGRAM_CHAT_ID", cast=int, default=0 +)