diff --git a/poetry.lock b/poetry.lock index beace8899f..c6558b1e95 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -966,7 +966,7 @@ files = [ {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, ] -markers = {main = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"", dev = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and python_version < \"3.12\""} +markers = {dev = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and python_version < \"3.12\""} [package.dependencies] zipp = ">=0.5" @@ -1681,10 +1681,9 @@ nicer-shell = ["ipython"] name = "opentelemetry-api" version = "1.34.1" description = "OpenTelemetry Python API" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"" files = [ {file = "opentelemetry_api-1.34.1-py3-none-any.whl", hash = "sha256:b7df4cb0830d5a6c29ad0c0691dbae874d8daefa934b8b1d642de48323d32a8c"}, {file = "opentelemetry_api-1.34.1.tar.gz", hash = "sha256:64f0bd06d42824843731d05beea88d4d4b6ae59f9fe347ff7dfa2cc14233bbb3"}, @@ -1774,6 +1773,23 @@ opentelemetry-sdk = ">=1.34.1,<1.35.0" requests = ">=2.7,<3.0" typing-extensions = ">=4.5.0" +[[package]] +name = "opentelemetry-exporter-prometheus" +version = "0.55b1" +description = "Prometheus Metric Exporter for OpenTelemetry" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "opentelemetry_exporter_prometheus-0.55b1-py3-none-any.whl", hash = "sha256:f364fbbff9e5de37a112ff104d1185fb1d7e2046c5ab5911e5afebc7ab3ddf0e"}, + {file = "opentelemetry_exporter_prometheus-0.55b1.tar.gz", hash = "sha256:d13ec0b22bf394113ff1ada5da98133a4b051779b803dae183188e26c4bd9ee0"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-sdk = ">=1.34.1,<1.35.0" +prometheus-client = ">=0.5.0,<1.0.0" + [[package]] name = "opentelemetry-opentracing-shim" version = "0.55b1" @@ -1812,10 +1828,9 @@ protobuf = ">=5.0,<6.0" name = "opentelemetry-sdk" version = "1.34.1" description = "OpenTelemetry Python SDK" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"" files = [ {file = "opentelemetry_sdk-1.34.1-py3-none-any.whl", hash = "sha256:308effad4059562f1d92163c61c8141df649da24ce361827812c40abb2a1e96e"}, {file = "opentelemetry_sdk-1.34.1.tar.gz", hash = "sha256:8091db0d763fcd6098d4781bbc80ff0971f94e260739aa6afe6fd379cdf3aa4d"}, @@ -1830,10 +1845,9 @@ typing-extensions = ">=4.5.0" name = "opentelemetry-semantic-conventions" version = "0.55b1" description = "OpenTelemetry Semantic Conventions" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"" files = [ {file = "opentelemetry_semantic_conventions-0.55b1-py3-none-any.whl", hash = "sha256:5da81dfdf7d52e3d37f8fe88d5e771e191de924cfff5f550ab0b8f7b2409baed"}, {file = "opentelemetry_semantic_conventions-0.55b1.tar.gz", hash = "sha256:ef95b1f009159c28d7a7849f5cbc71c4c34c845bb514d66adfdf1b3fff3598b3"}, @@ -3578,7 +3592,7 @@ files = [ {file = "zipp-3.19.1-py3-none-any.whl", hash = "sha256:2828e64edb5386ea6a52e7ba7cdb17bb30a73a858f5eb6eb93d8d36f5ea26091"}, {file = "zipp-3.19.1.tar.gz", hash = "sha256:35427f6d5594f4acf82d25541438348c26736fa9b3afa2754bcd63cdb99d8e8f"}, ] -markers = {main = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"", dev = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and python_version < \"3.12\""} +markers = {dev = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and python_version < \"3.12\""} [package.extras] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] @@ -3690,4 +3704,4 @@ url-preview = ["lxml"] [metadata] lock-version = "2.1" python-versions = "^3.10.0" -content-hash = "403b3be5269c4c6e9b509976307f40ab198d4db3c1637d8403b3f5f53d43ea36" +content-hash = "a4adc425ae84e5b175bca28f76ff57c6f051fe8d62c03efc4398c6ef9e207c40" diff --git a/pyproject.toml b/pyproject.toml index 117e1e0b5b..fbed197dc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -238,6 +238,7 @@ pydantic = [ { version = "~=2.8", python = "<3.14" }, { version = "~=2.12", python = ">=3.14" }, ] +opentelemetry-exporter-prometheus = "0.55b1" # This is for building the rust components during "poetry install", which # currently ignores the `build-system.requires` directive (c.f. diff --git a/synapse/api/auth/__init__.py b/synapse/api/auth/__init__.py index cc0c0d4601..4d2a736a79 100644 --- a/synapse/api/auth/__init__.py +++ b/synapse/api/auth/__init__.py @@ -20,13 +20,11 @@ # from typing import TYPE_CHECKING, Optional, Protocol -from prometheus_client import Histogram - from twisted.web.server import Request from synapse.appservice import ApplicationService from synapse.http.site import SynapseRequest -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram from synapse.types import Requester if TYPE_CHECKING: @@ -36,7 +34,7 @@ GUEST_DEVICE_ID = "guest_device" -introspection_response_timer = Histogram( +introspection_response_timer = SynapseHistogram( "synapse_api_auth_delegated_introspection_response", "Time taken to get a response for an introspection request", labelnames=["code", SERVER_NAME_LABEL], diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 13a0e3db7c..6931e0f875 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -24,11 +24,9 @@ import sys from typing import TYPE_CHECKING, Mapping, Sized -from prometheus_client import Gauge - from twisted.internet import defer -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.types import JsonDict from synapse.util.constants import ( MILLISECONDS_PER_SECOND, @@ -57,22 +55,22 @@ _stats_process: list[tuple[int, "resource.struct_rusage"]] = [] # Gauges to expose monthly active user control metrics -current_mau_gauge = Gauge( +current_mau_gauge = SynapseGauge( "synapse_admin_mau_current", "Current MAU", labelnames=[SERVER_NAME_LABEL], ) -current_mau_by_service_gauge = Gauge( +current_mau_by_service_gauge = SynapseGauge( "synapse_admin_mau_current_mau_by_service", "Current MAU by service", labelnames=["app_service", SERVER_NAME_LABEL], ) -max_mau_gauge = Gauge( +max_mau_gauge = SynapseGauge( "synapse_admin_mau_max", "MAU Limit", labelnames=[SERVER_NAME_LABEL], ) -registered_reserved_users_mau_gauge = Gauge( +registered_reserved_users_mau_gauge = SynapseGauge( "synapse_admin_mau_registered_reserved_users", "Registered users with reserved threepids", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index f08a921998..80b9c6b81d 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -31,7 +31,6 @@ Union, ) -from prometheus_client import Counter from typing_extensions import ParamSpec, TypeGuard from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind @@ -45,7 +44,7 @@ from synapse.events.utils import SerializeEventConfig, serialize_event from synapse.http.client import SimpleHttpClient, is_unknown_endpoint from synapse.logging import opentracing -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import DeviceListUpdates, JsonDict, JsonMapping, ThirdPartyInstanceID from synapse.util.caches.response_cache import ResponseCache @@ -54,31 +53,31 @@ logger = logging.getLogger(__name__) -sent_transactions_counter = Counter( +sent_transactions_counter = SynapseCounter( "synapse_appservice_api_sent_transactions", "Number of /transactions/ requests sent", labelnames=["service", SERVER_NAME_LABEL], ) -failed_transactions_counter = Counter( +failed_transactions_counter = SynapseCounter( "synapse_appservice_api_failed_transactions", "Number of /transactions/ requests that failed to send", labelnames=["service", SERVER_NAME_LABEL], ) -sent_events_counter = Counter( +sent_events_counter = SynapseCounter( "synapse_appservice_api_sent_events", "Number of events sent to the AS", labelnames=["service", SERVER_NAME_LABEL], ) -sent_ephemeral_counter = Counter( +sent_ephemeral_counter = SynapseCounter( "synapse_appservice_api_sent_ephemeral", "Number of ephemeral events sent to the AS", labelnames=["service", SERVER_NAME_LABEL], ) -sent_todevice_counter = Counter( +sent_todevice_counter = SynapseCounter( "synapse_appservice_api_sent_todevice", "Number of todevice messages sent to the AS", labelnames=["service", SERVER_NAME_LABEL], diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index cb2fa59f54..db2128dbf5 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -41,7 +41,6 @@ ) import attr -from prometheus_client import Counter from synapse.api.constants import Direction, EventContentFields, EventTypes, Membership from synapse.api.errors import ( @@ -71,7 +70,7 @@ from synapse.http.client import is_unknown_endpoint from synapse.http.types import QueryParams from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import JsonDict, StrCollection, UserID, get_domain_from_id from synapse.types.handlers.policy_server import RECOMMENDATION_OK, RECOMMENDATION_SPAM from synapse.util.async_helpers import concurrently_execute @@ -83,7 +82,7 @@ logger = logging.getLogger(__name__) -sent_queries_counter = Counter( +sent_queries_counter = SynapseCounter( "synapse_federation_client_sent_queries", "", labelnames=["type", SERVER_NAME_LABEL] ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9d1a981d97..743866970f 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -32,8 +32,6 @@ Union, ) -from prometheus_client import Counter, Gauge, Histogram - from twisted.python import failure from synapse.api.constants import ( @@ -79,7 +77,12 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import ( + SERVER_NAME_LABEL, + SynapseCounter, + SynapseGauge, + SynapseHistogram, +) from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -102,27 +105,27 @@ logger = logging.getLogger(__name__) -received_pdus_counter = Counter( +received_pdus_counter = SynapseCounter( "synapse_federation_server_received_pdus", "", labelnames=[SERVER_NAME_LABEL] ) -received_edus_counter = Counter( +received_edus_counter = SynapseCounter( "synapse_federation_server_received_edus", "", labelnames=[SERVER_NAME_LABEL] ) -received_queries_counter = Counter( +received_queries_counter = SynapseCounter( "synapse_federation_server_received_queries", "", labelnames=["type", SERVER_NAME_LABEL], ) -pdu_process_time = Histogram( +pdu_process_time = SynapseHistogram( "synapse_federation_server_pdu_process_time", "Time taken to process an event", labelnames=[SERVER_NAME_LABEL], ) -last_pdu_ts_metric = Gauge( +last_pdu_ts_metric = SynapseGauge( "synapse_federation_last_received_pdu_time", "The timestamp of the last PDU which was successfully received from the given domain", labelnames=("origin_server_name", SERVER_NAME_LABEL), diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 229ae647c0..e30f75f031 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -142,7 +142,6 @@ ) import attr -from prometheus_client import Counter from twisted.internet import defer @@ -160,6 +159,7 @@ from synapse.metrics import ( SERVER_NAME_LABEL, LaterGauge, + SynapseCounter, event_processing_loop_counter, event_processing_loop_room_count, events_processed_counter, @@ -184,13 +184,13 @@ logger = logging.getLogger(__name__) -sent_pdus_destination_dist_count = Counter( +sent_pdus_destination_dist_count = SynapseCounter( "synapse_federation_client_sent_pdu_destinations_count", "Number of PDUs queued for sending to one or more destinations", labelnames=[SERVER_NAME_LABEL], ) -sent_pdus_destination_dist_total = Counter( +sent_pdus_destination_dist_total = SynapseCounter( "synapse_federation_client_sent_pdu_destinations", "Total number of PDUs queued for sending across all destinations", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index ecf4789d76..1b69d26333 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -26,7 +26,6 @@ from typing import TYPE_CHECKING, Hashable, Iterable, Optional import attr -from prometheus_client import Counter from twisted.internet import defer @@ -42,7 +41,7 @@ from synapse.handlers.presence import format_user_presence_state from synapse.logging import issue9533_logger from synapse.logging.opentracing import SynapseTags, set_tag -from synapse.metrics import SERVER_NAME_LABEL, sent_transactions_counter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, sent_transactions_counter from synapse.types import JsonDict, ReadReceipt from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from synapse.visibility import filter_events_for_server @@ -56,13 +55,13 @@ logger = logging.getLogger(__name__) -sent_edus_counter = Counter( +sent_edus_counter = SynapseCounter( "synapse_federation_client_sent_edus", "Total number of EDUs successfully sent", labelnames=[SERVER_NAME_LABEL], ) -sent_edus_by_type = Counter( +sent_edus_by_type = SynapseCounter( "synapse_federation_client_sent_edus_by_type", "Number of sent EDUs successfully sent, by event type", labelnames=["type", SERVER_NAME_LABEL], diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 99aa05ebd6..47315527db 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -20,8 +20,6 @@ import logging from typing import TYPE_CHECKING -from prometheus_client import Gauge - from synapse.api.constants import EduTypes from synapse.api.errors import HttpResponseException from synapse.events import EventBase @@ -34,7 +32,7 @@ tags, whitelisted_homeserver, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.types import JsonDict from synapse.util.json import json_decoder from synapse.util.metrics import measure_func @@ -45,7 +43,7 @@ logger = logging.getLogger(__name__) issue_8631_logger = logging.getLogger("synapse.8631_debug") -last_pdu_ts_metric = Gauge( +last_pdu_ts_metric = SynapseGauge( "synapse_federation_last_sent_pdu_time", "The timestamp of the last PDU which was successfully sent to the given domain", labelnames=("destination_server_name", SERVER_NAME_LABEL), diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 5240178d80..f7cd3ace85 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -28,8 +28,6 @@ Union, ) -from prometheus_client import Counter - from twisted.internet import defer import synapse @@ -40,6 +38,7 @@ from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics import ( SERVER_NAME_LABEL, + SynapseCounter, event_processing_loop_counter, event_processing_loop_room_count, ) @@ -65,7 +64,7 @@ logger = logging.getLogger(__name__) -events_processed_counter = Counter( +events_processed_counter = SynapseCounter( "synapse_handlers_appservice_events_processed", "", labelnames=[SERVER_NAME_LABEL] ) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 32ef89bab4..9ccd6bffa9 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -41,7 +41,6 @@ import attr import bcrypt import unpaddedbase64 -from prometheus_client import Counter from twisted.internet.defer import CancelledError from twisted.web.server import Request @@ -66,7 +65,7 @@ from synapse.http.server import finish_request, respond_with_html from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.databases.main.registration import ( LoginTokenExpired, @@ -89,7 +88,7 @@ INVALID_USERNAME_OR_PASSWORD = "Invalid username or password" -invalid_login_token_counter = Counter( +invalid_login_token_counter = SynapseCounter( "synapse_user_login_invalid_login_tokens", "Counts the number of rejected m.login.token on /login", labelnames=["reason", SERVER_NAME_LABEL], diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e325a0e209..190fe49547 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -36,7 +36,6 @@ ) import attr -from prometheus_client import Histogram from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json from unpaddedbase64 import decode_base64 @@ -67,7 +66,7 @@ from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram from synapse.module_api import NOT_SPAM from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.invite_rule import InviteRule @@ -83,7 +82,7 @@ logger = logging.getLogger(__name__) # Added to debug performance and track progress on optimizations -backfill_processing_before_timer = Histogram( +backfill_processing_before_timer = SynapseHistogram( "synapse_federation_backfill_processing_before_time_seconds", "sec", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 32b603e947..0e0df3d324 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -32,8 +32,6 @@ Sequence, ) -from prometheus_client import Counter, Histogram - from synapse import event_auth from synapse.api.constants import ( EventContentFields, @@ -76,7 +74,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram from synapse.replication.http.federation import ( ReplicationFederationSendEventsRestServlet, ) @@ -102,14 +100,14 @@ logger = logging.getLogger(__name__) -soft_failed_event_counter = Counter( +soft_failed_event_counter = SynapseCounter( "synapse_federation_soft_failed_events_total", "Events received over federation that we marked as soft_failed", labelnames=[SERVER_NAME_LABEL], ) # Added to debug performance and track progress on optimizations -backfill_processing_after_timer = Histogram( +backfill_processing_after_timer = SynapseHistogram( "synapse_federation_backfill_processing_after_time_seconds", "sec", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index d8150a5857..4f366802a7 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -91,8 +91,6 @@ Optional, ) -from prometheus_client import Counter - import synapse.metrics from synapse.api.constants import EduTypes, EventTypes, Membership, PresenceState from synapse.api.errors import SynapseError @@ -100,7 +98,7 @@ from synapse.appservice import ApplicationService from synapse.events.presence_router import PresenceRouter from synapse.logging.context import run_in_background -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) @@ -131,37 +129,37 @@ logger = logging.getLogger(__name__) -notified_presence_counter = Counter( +notified_presence_counter = SynapseCounter( "synapse_handler_presence_notified_presence", "", labelnames=[SERVER_NAME_LABEL] ) -federation_presence_out_counter = Counter( +federation_presence_out_counter = SynapseCounter( "synapse_handler_presence_federation_presence_out", "", labelnames=[SERVER_NAME_LABEL], ) -presence_updates_counter = Counter( +presence_updates_counter = SynapseCounter( "synapse_handler_presence_presence_updates", "", labelnames=[SERVER_NAME_LABEL] ) -timers_fired_counter = Counter( +timers_fired_counter = SynapseCounter( "synapse_handler_presence_timers_fired", "", labelnames=[SERVER_NAME_LABEL] ) -federation_presence_counter = Counter( +federation_presence_counter = SynapseCounter( "synapse_handler_presence_federation_presence", "", labelnames=[SERVER_NAME_LABEL] ) -bump_active_time_counter = Counter( +bump_active_time_counter = SynapseCounter( "synapse_handler_presence_bump_active_time", "", labelnames=[SERVER_NAME_LABEL] ) -get_updates_counter = Counter( +get_updates_counter = SynapseCounter( "synapse_handler_presence_get_updates", "", labelnames=["type", SERVER_NAME_LABEL] ) -notify_reason_counter = Counter( +notify_reason_counter = SynapseCounter( "synapse_handler_presence_notify_reason", "", labelnames=["locality", "reason", SERVER_NAME_LABEL], ) -state_transition_counter = Counter( +state_transition_counter = SynapseCounter( "synapse_handler_presence_state_transition", "", labelnames=["locality", "from", "to", SERVER_NAME_LABEL], diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 8b620a91bc..89116cae6a 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -30,8 +30,6 @@ TypedDict, ) -from prometheus_client import Counter - from synapse import types from synapse.api.constants import ( MAX_USERID_LENGTH, @@ -50,7 +48,7 @@ from synapse.appservice import ApplicationService from synapse.config.server import is_threepid_reserved from synapse.http.servlet import assert_params_in_dict -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.replication.http.login import RegisterDeviceReplicationServlet from synapse.replication.http.register import ( ReplicationPostRegisterActionsServlet, @@ -65,13 +63,13 @@ logger = logging.getLogger(__name__) -registration_counter = Counter( +registration_counter = SynapseCounter( "synapse_user_registrations_total", "Number of new users registered (since restart)", labelnames=["guest", "shadow_banned", "auth_provider", SERVER_NAME_LABEL], ) -login_counter = Counter( +login_counter = SynapseCounter( "synapse_user_logins_total", "Number of user logins (since restart)", labelnames=["guest", "auth_provider", SERVER_NAME_LABEL], diff --git a/synapse/handlers/sliding_sync/__init__.py b/synapse/handlers/sliding_sync/__init__.py index cea4b857ee..cebe3a2d5f 100644 --- a/synapse/handlers/sliding_sync/__init__.py +++ b/synapse/handlers/sliding_sync/__init__.py @@ -17,7 +17,6 @@ from itertools import chain from typing import TYPE_CHECKING, AbstractSet, Mapping, Optional -from prometheus_client import Histogram from typing_extensions import assert_never from synapse.api.constants import Direction, EventTypes, Membership @@ -38,7 +37,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.databases.main.state_deltas import StateDelta from synapse.storage.databases.main.stream import PaginateFunction @@ -77,7 +76,7 @@ logger = logging.getLogger(__name__) -sync_processing_time = Histogram( +sync_processing_time = SynapseHistogram( "synapse_sliding_sync_processing_time", "Time taken to generate a sliding sync response, ignoring wait times.", labelnames=["initial", SERVER_NAME_LABEL], diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index a19b75203b..8d9f4905fc 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -30,7 +30,6 @@ ) import attr -from prometheus_client import Counter from synapse.api.constants import ( AccountDataTypes, @@ -54,7 +53,7 @@ start_active_span, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.storage.databases.main.event_push_actions import RoomNotifCounts from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.databases.main.stream import PaginateFunction @@ -91,7 +90,7 @@ # "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is # "true" or "false" depending on if the request asked for lazy loaded members or # not. -non_empty_sync_counter = Counter( +non_empty_sync_counter = SynapseCounter( "synapse_handlers_sync_nonempty_total", "Count of non empty sync responses. type is initial_sync/full_state_sync" "/incremental_sync. lazy_loaded indicates if lazy loaded members were " diff --git a/synapse/http/client.py b/synapse/http/client.py index ff1f7c7128..2098277053 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -37,7 +37,6 @@ import treq from canonicaljson import encode_canonical_json from netaddr import AddrFormatError, IPAddress, IPSet -from prometheus_client import Counter from zope.interface import implementer from OpenSSL import SSL @@ -81,7 +80,7 @@ from synapse.http.types import QueryParams from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag, start_active_span, tags -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import ISynapseReactor, StrSequence from synapse.util.async_helpers import timeout_deferred from synapse.util.clock import Clock @@ -106,10 +105,10 @@ logger = logging.getLogger(__name__) -outgoing_requests_counter = Counter( +outgoing_requests_counter = SynapseCounter( "synapse_http_client_requests", "", labelnames=["method", SERVER_NAME_LABEL] ) -incoming_responses_counter = Counter( +incoming_responses_counter = SynapseCounter( "synapse_http_client_responses", "", labelnames=["method", "code", SERVER_NAME_LABEL], diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 562007c74f..23d6401ad0 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -44,7 +44,6 @@ import attr import treq from canonicaljson import encode_canonical_json -from prometheus_client import Counter from signedjson.sign import sign_json from twisted.internet import defer @@ -84,7 +83,7 @@ from synapse.logging import opentracing from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag, start_active_span, tags -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import JsonDict from synapse.util.async_helpers import AwakenableSleeper, Linearizer, timeout_deferred from synapse.util.clock import Clock @@ -97,12 +96,12 @@ logger = logging.getLogger(__name__) -outgoing_requests_counter = Counter( +outgoing_requests_counter = SynapseCounter( "synapse_http_matrixfederationclient_requests", "", labelnames=["method", SERVER_NAME_LABEL], ) -incoming_responses_counter = Counter( +incoming_responses_counter = SynapseCounter( "synapse_http_matrixfederationclient_responses", "", labelnames=["method", "code", SERVER_NAME_LABEL], diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 5cc8a2ebd8..54995475d4 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -24,52 +24,55 @@ import traceback from typing import Mapping -from prometheus_client.core import Counter, Histogram - from synapse.logging.context import current_context -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import ( + SERVER_NAME_LABEL, + LaterGauge, + SynapseCounter, + SynapseHistogram, +) logger = logging.getLogger(__name__) # total number of responses served, split by method/servlet/tag -response_count = Counter( +response_count = SynapseCounter( "synapse_http_server_response_count", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) -requests_counter = Counter( +requests_counter = SynapseCounter( "synapse_http_server_requests_received", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], ) -outgoing_responses_counter = Counter( +outgoing_responses_counter = SynapseCounter( "synapse_http_server_responses", "", labelnames=["method", "code", SERVER_NAME_LABEL], ) -response_timer = Histogram( +response_timer = SynapseHistogram( "synapse_http_server_response_time_seconds", "sec", labelnames=["method", "servlet", "tag", "code", SERVER_NAME_LABEL], ) -response_ru_utime = Counter( +response_ru_utime = SynapseCounter( "synapse_http_server_response_ru_utime_seconds", "sec", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) -response_ru_stime = Counter( +response_ru_stime = SynapseCounter( "synapse_http_server_response_ru_stime_seconds", "sec", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) -response_db_txn_count = Counter( +response_db_txn_count = SynapseCounter( "synapse_http_server_response_db_txn_count", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], @@ -77,21 +80,21 @@ # seconds spent waiting for db txns, excluding scheduling time, when processing # this request -response_db_txn_duration = Counter( +response_db_txn_duration = SynapseCounter( "synapse_http_server_response_db_txn_duration_seconds", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) # seconds spent waiting for a db connection, when processing this request -response_db_sched_duration = Counter( +response_db_sched_duration = SynapseCounter( "synapse_http_server_response_db_sched_duration_seconds", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) # size in bytes of the response written -response_size = Counter( +response_size = SynapseCounter( "synapse_http_server_response_size", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], @@ -100,19 +103,19 @@ # In flight metrics are incremented while the requests are in flight, rather # than when the response was written. -in_flight_requests_ru_utime = Counter( +in_flight_requests_ru_utime = SynapseCounter( "synapse_http_server_in_flight_requests_ru_utime_seconds", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], ) -in_flight_requests_ru_stime = Counter( +in_flight_requests_ru_stime = SynapseCounter( "synapse_http_server_in_flight_requests_ru_stime_seconds", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], ) -in_flight_requests_db_txn_count = Counter( +in_flight_requests_db_txn_count = SynapseCounter( "synapse_http_server_in_flight_requests_db_txn_count", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], @@ -120,14 +123,14 @@ # seconds spent waiting for db txns, excluding scheduling time, when processing # this request -in_flight_requests_db_txn_duration = Counter( +in_flight_requests_db_txn_duration = SynapseCounter( "synapse_http_server_in_flight_requests_db_txn_duration_seconds", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], ) # seconds spent waiting for a db connection, when processing this request -in_flight_requests_db_sched_duration = Counter( +in_flight_requests_db_sched_duration = SynapseCounter( "synapse_http_server_in_flight_requests_db_sched_duration_seconds", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 5b6e9c94f5..b7fc4b0048 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -27,9 +27,11 @@ import threading from importlib import metadata from typing import ( + Any, Callable, Generic, Iterable, + Literal, Mapping, Optional, Sequence, @@ -39,6 +41,12 @@ ) import attr +from opentelemetry import metrics +from opentelemetry.exporter.prometheus import ( + PrometheusMetricReader, +) +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.resources import SERVICE_NAME, Resource as OtelResource from packaging.version import parse as parse_version from prometheus_client import ( CollectorRegistry, @@ -48,11 +56,13 @@ Metric, generate_latest, ) +from prometheus_client.context_managers import InprogressTracker, Timer from prometheus_client.core import ( REGISTRY, GaugeHistogramMetricFamily, GaugeMetricFamily, ) +from prometheus_client.utils import INF from twisted.python.threadpool import ThreadPool from twisted.web.resource import Resource @@ -134,6 +144,16 @@ def _set_prometheus_client_use_created_metrics(new_value: bool) -> None: # Set this globally so it applies wherever we generate/collect metrics _set_prometheus_client_use_created_metrics(False) +# This will create a Resource that can do the displaying of the prometheus metrics. The +# start_http_server() that is used by the listen_metrics() call in _base.py will pick +# this up and serve it. +resource = OtelResource(attributes={SERVICE_NAME: "synapse"}) +reader = PrometheusMetricReader() +provider = MeterProvider(resource=resource, metric_readers=[reader]) +metrics.set_meter_provider(provider) +# Global meter for registering otel metrics +meter = provider.get_meter("synapse-otel-meter") + class _RegistryProxy: @staticmethod @@ -150,6 +170,173 @@ def collect() -> Iterable[Metric]: RegistryProxy = cast(CollectorRegistry, _RegistryProxy) +class SynapseCounter: + def __init__( + self, + name: str, + documentation: str, + labelnames: Iterable[str] = (), + namespace: str = "", + subsystem: str = "", + unit: str = "", + registry: Optional[CollectorRegistry] = REGISTRY, + _labelvalues: Optional[Sequence[str]] = None, + ) -> None: + # TODO: remove the ignore + self._prometheus_counter = Counter( # type: ignore[missing-server-name-label] + name=name, + documentation=documentation, + labelnames=labelnames, + namespace=namespace, + subsystem=subsystem, + unit=unit, + registry=registry, + _labelvalues=_labelvalues, + ) + # Here is where we grab the global meter to create a FauxCounter + self._otel_counter = meter.create_counter( + name, unit=unit, description=documentation + ) + + def inc(self, amount: float = 1.0) -> None: + self._prometheus_counter.inc(amount=amount) + # Need to verify what happens with Counters that do not have labels as children, + # this may not be appropriate in those cases. Can probably just leave the + # attributes param as empty in that case? + self._otel_counter.add(amount) + + def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Counter: + return self._prometheus_counter.labels(*labelvalues, **labelkwargs) + + def collect(self) -> Iterable[Metric]: + return self._prometheus_counter.collect() + + +class SynapseGauge: + def __init__( + self, + name: str, + documentation: str, + labelnames: Iterable[str] = (), + namespace: str = "", + subsystem: str = "", + unit: str = "", + registry: Optional[CollectorRegistry] = REGISTRY, + _labelvalues: Optional[Sequence[str]] = None, + multiprocess_mode: Literal[ + "all", + "liveall", + "min", + "livemin", + "max", + "livemax", + "sum", + "livesum", + "mostrecent", + "livemostrecent", + ] = "all", + ): + # TODO: remove the type ignore + self._prometheus_gauge = Gauge( # type: ignore[missing-server-name-label] + name=name, + documentation=documentation, + labelnames=labelnames, + namespace=namespace, + subsystem=subsystem, + unit=unit, + registry=registry, + _labelvalues=_labelvalues, + multiprocess_mode=multiprocess_mode, + ) + # Here is where we grab the global meter to create a FauxGauge + self._otel_gauge = meter.create_gauge( + name, unit=unit, description=documentation + ) + + def set(self, value: float) -> None: + self._prometheus_gauge.set(value) + self._otel_gauge.set(value) + + def inc(self, amount: float = 1) -> None: + self._prometheus_gauge.inc(amount) + self._otel_gauge.set(self._prometheus_gauge._value.get()) + + def dec(self, amount: float = 1) -> None: + self._prometheus_gauge.dec(amount) + self._otel_gauge.set(self._prometheus_gauge._value.get()) + + def track_inprogress(self) -> InprogressTracker: + return self._prometheus_gauge.track_inprogress() + + def set_function(self, f: Callable[[], float]) -> None: + self._prometheus_gauge.set_function(f) + # TODO: figure out what's the equivalent for otel here + + def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Gauge: + return self._prometheus_gauge.labels(*labelvalues, **labelkwargs) + + def remove(self, *labelvalues: Any) -> None: + self._prometheus_gauge.remove(*labelvalues) + + def collect(self) -> Iterable[Metric]: + return self._prometheus_gauge.collect() + + +class SynapseHistogram: + DEFAULT_BUCKETS = ( + 0.005, + 0.01, + 0.025, + 0.05, + 0.075, + 0.1, + 0.25, + 0.5, + 0.75, + 1.0, + 2.5, + 5.0, + 7.5, + 10.0, + INF, + ) + + def __init__( + self, + name: str, + documentation: str, + labelnames: Iterable[str] = (), + namespace: str = "", + subsystem: str = "", + unit: str = "", + registry: Optional[CollectorRegistry] = REGISTRY, + _labelvalues: Optional[Sequence[str]] = None, + buckets: Sequence[Union[float, str]] = DEFAULT_BUCKETS, + ): + # TODO: remove the type ignore + self._prometheus_histogram = Histogram( # type: ignore[missing-server-name-label] + name=name, + documentation=documentation, + labelnames=labelnames, + buckets=buckets, + ) + self._otel_histogram = meter.create_histogram( + name, + unit=unit, + description=documentation, + # prometheus_client accepts "INF" as a boundary, but otel doesn't, we just remove it + explicit_bucket_boundaries_advisory=[ + x for x in buckets if isinstance(x, float) + ], + ) + + def time(self) -> Timer: + return self._prometheus_histogram.time() + + def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Histogram: + return self._prometheus_histogram.labels(*labelvalues, **labelkwargs) + + @attr.s(slots=True, hash=True, auto_attribs=True, kw_only=True) class LaterGauge(Collector): """A Gauge which periodically calls a user-provided callback to produce metrics.""" @@ -600,21 +787,21 @@ def collect(self) -> Iterable[Metric]: # Federation Metrics # -sent_transactions_counter = Counter( +sent_transactions_counter = SynapseCounter( "synapse_federation_client_sent_transactions", "", labelnames=[SERVER_NAME_LABEL] ) -events_processed_counter = Counter( +events_processed_counter = SynapseCounter( "synapse_federation_client_events_processed", "", labelnames=[SERVER_NAME_LABEL] ) -event_processing_loop_counter = Counter( +event_processing_loop_counter = SynapseCounter( "synapse_event_processing_loop_count", "Event processing loop iterations", labelnames=["name", SERVER_NAME_LABEL], ) -event_processing_loop_room_count = Counter( +event_processing_loop_room_count = SynapseCounter( "synapse_event_processing_loop_room_count", "Rooms seen per event processing loop iteration", labelnames=["name", SERVER_NAME_LABEL], @@ -623,29 +810,29 @@ def collect(self) -> Iterable[Metric]: # Used to track where various components have processed in the event stream, # e.g. federation sending, appservice sending, etc. -event_processing_positions = Gauge( +event_processing_positions = SynapseGauge( "synapse_event_processing_positions", "", labelnames=["name", SERVER_NAME_LABEL] ) # Used to track the current max events stream position -event_persisted_position = Gauge( +event_persisted_position = SynapseGauge( "synapse_event_persisted_position", "", labelnames=[SERVER_NAME_LABEL] ) # Used to track the received_ts of the last event processed by various # components -event_processing_last_ts = Gauge( +event_processing_last_ts = SynapseGauge( "synapse_event_processing_last_ts", "", labelnames=["name", SERVER_NAME_LABEL] ) # Used to track the lag processing events. This is the time difference # between the last processed event's received_ts and the time it was # finished being processed. -event_processing_lag = Gauge( +event_processing_lag = SynapseGauge( "synapse_event_processing_lag", "", labelnames=["name", SERVER_NAME_LABEL] ) -event_processing_lag_by_event = Histogram( +event_processing_lag_by_event = SynapseHistogram( "synapse_event_processing_lag_by_event", "Time between an event being persisted and it being queued up to be sent to the relevant remote servers", labelnames=["name", SERVER_NAME_LABEL], @@ -656,7 +843,7 @@ def collect(self) -> Iterable[Metric]: # This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`. We # consider this process-level because all Synapse homeservers running in the process # will use the same Synapse version. -build_info = Gauge( # type: ignore[missing-server-name-label] +build_info = SynapseGauge( "synapse_build_info", "Build information", ["pythonversion", "version", "osversion"] ) build_info.labels( @@ -666,14 +853,14 @@ def collect(self) -> Iterable[Metric]: ).set(1) # Loaded modules info -module_instances_info = Gauge( +module_instances_info = SynapseGauge( "synapse_module_info", "Information about loaded modules", labelnames=["package_name", "module_name", "module_version", SERVER_NAME_LABEL], ) # 3PID send info -threepid_send_requests = Histogram( +threepid_send_requests = SynapseHistogram( "synapse_threepid_send_requests_with_tries", documentation="Number of requests for a 3pid token by try count. Note if" " there is a request with try count of 4, then there would have been one" @@ -682,38 +869,38 @@ def collect(self) -> Iterable[Metric]: labelnames=("type", "reason", SERVER_NAME_LABEL), ) -threadpool_total_threads = Gauge( +threadpool_total_threads = SynapseGauge( "synapse_threadpool_total_threads", "Total number of threads currently in the threadpool", labelnames=["name", SERVER_NAME_LABEL], ) -threadpool_total_working_threads = Gauge( +threadpool_total_working_threads = SynapseGauge( "synapse_threadpool_working_threads", "Number of threads currently working in the threadpool", labelnames=["name", SERVER_NAME_LABEL], ) -threadpool_total_min_threads = Gauge( +threadpool_total_min_threads = SynapseGauge( "synapse_threadpool_min_threads", "Minimum number of threads configured in the threadpool", labelnames=["name", SERVER_NAME_LABEL], ) -threadpool_total_max_threads = Gauge( +threadpool_total_max_threads = SynapseGauge( "synapse_threadpool_max_threads", "Maximum number of threads configured in the threadpool", labelnames=["name", SERVER_NAME_LABEL], ) # Gauges for room counts -known_rooms_gauge = Gauge( +known_rooms_gauge = SynapseGauge( "synapse_known_rooms_total", "Total number of rooms", labelnames=[SERVER_NAME_LABEL], ) -locally_joined_rooms_gauge = Gauge( +locally_joined_rooms_gauge = SynapseGauge( "synapse_locally_joined_rooms_total", "Total number of locally joined rooms", labelnames=[SERVER_NAME_LABEL], @@ -771,4 +958,7 @@ def render_GET(self, request: Request) -> bytes: "GaugeBucketCollector", "MIN_TIME_BETWEEN_GCS", "install_gc_manager", + "SynapseCounter", + "SynapseGauge", + "SynapseHistogram", ] diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index c871598680..1d793bc172 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -38,7 +38,7 @@ ) from prometheus_client import Metric -from prometheus_client.core import REGISTRY, Counter, Gauge +from prometheus_client.core import REGISTRY from typing_extensions import Concatenate, ParamSpec from twisted.internet import defer @@ -54,7 +54,7 @@ start_active_span, start_active_span_follows_from, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge from synapse.metrics._types import Collector if TYPE_CHECKING: @@ -74,13 +74,13 @@ logger = logging.getLogger(__name__) -_background_process_start_count = Counter( +_background_process_start_count = SynapseCounter( "synapse_background_process_start_count", "Number of background processes started", labelnames=["name", SERVER_NAME_LABEL], ) -_background_process_in_flight_count = Gauge( +_background_process_in_flight_count = SynapseGauge( "synapse_background_process_in_flight_count", "Number of background processes in flight", labelnames=["name", SERVER_NAME_LABEL], @@ -90,28 +90,28 @@ # the default registry. Instead we collect them all via the CustomCollector, # which ensures that we can update them before they are collected. # -_background_process_ru_utime = Counter( +_background_process_ru_utime = SynapseCounter( "synapse_background_process_ru_utime_seconds", "User CPU time used by background processes, in seconds", labelnames=["name", SERVER_NAME_LABEL], registry=None, ) -_background_process_ru_stime = Counter( +_background_process_ru_stime = SynapseCounter( "synapse_background_process_ru_stime_seconds", "System CPU time used by background processes, in seconds", labelnames=["name", SERVER_NAME_LABEL], registry=None, ) -_background_process_db_txn_count = Counter( +_background_process_db_txn_count = SynapseCounter( "synapse_background_process_db_txn_count", "Number of database transactions done by background processes", labelnames=["name", SERVER_NAME_LABEL], registry=None, ) -_background_process_db_txn_duration = Counter( +_background_process_db_txn_duration = SynapseCounter( "synapse_background_process_db_txn_duration_seconds", ( "Seconds spent by background processes waiting for database " @@ -121,7 +121,7 @@ registry=None, ) -_background_process_db_sched_duration = Counter( +_background_process_db_sched_duration = SynapseCounter( "synapse_background_process_db_sched_duration_seconds", "Seconds spent by background processes waiting for database connections", labelnames=["name", SERVER_NAME_LABEL], diff --git a/synapse/metrics/common_usage_metrics.py b/synapse/metrics/common_usage_metrics.py index 0c3f380177..eaa5d2fdf2 100644 --- a/synapse/metrics/common_usage_metrics.py +++ b/synapse/metrics/common_usage_metrics.py @@ -23,35 +23,34 @@ import attr -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge if TYPE_CHECKING: from synapse.server import HomeServer -from prometheus_client import Gauge # Gauge to expose daily active users metrics -current_dau_gauge = Gauge( +current_dau_gauge = SynapseGauge( "synapse_admin_daily_active_users", "Current daily active users count", labelnames=[SERVER_NAME_LABEL], ) # Gauge for users -users_in_status_gauge = Gauge( +users_in_status_gauge = SynapseGauge( "synapse_user_count", "Number of users in active, deactivated, suspended, and locked status", ["status", SERVER_NAME_LABEL], ) -users_in_time_ranges_gauge = Gauge( +users_in_time_ranges_gauge = SynapseGauge( "synapse_active_users", "Number of active users in time ranges in 24h, 7d, and 30d", ["time_range", SERVER_NAME_LABEL], ) # We may want to add additional ranges in the future. -retained_users_gauge = Gauge( +retained_users_gauge = SynapseGauge( "synapse_retained_users", "Number of retained users in 30d", ["time_range", SERVER_NAME_LABEL], diff --git a/synapse/notifier.py b/synapse/notifier.py index 4a75d07e37..2ab5e45e66 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -35,7 +35,6 @@ ) import attr -from prometheus_client import Counter from twisted.internet import defer from twisted.internet.defer import Deferred @@ -47,7 +46,7 @@ from synapse.logging import issue9533_logger from synapse.logging.context import PreserveLoggingContext from synapse.logging.opentracing import log_kv, start_active_span -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter from synapse.streams.config import PaginationConfig from synapse.types import ( ISynapseReactor, @@ -72,11 +71,11 @@ logger = logging.getLogger(__name__) # FIXME: Unused metric, remove if not needed. -notified_events_counter = Counter( +notified_events_counter = SynapseCounter( "synapse_notifier_notified_events", "", labelnames=[SERVER_NAME_LABEL] ) -users_woken_by_stream_counter = Counter( +users_woken_by_stream_counter = SynapseCounter( "synapse_notifier_users_woken_by_stream", "", labelnames=["stream", SERVER_NAME_LABEL], diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 9fcd7fdc6e..c62d730ec5 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -31,8 +31,6 @@ cast, ) -from prometheus_client import Counter - from twisted.internet.defer import Deferred from synapse.api.constants import ( @@ -47,7 +45,7 @@ from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext, EventPersistencePair from synapse.logging.context import make_deferred_yieldable, run_in_background -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.state import CREATE_KEY, POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership from synapse.storage.invite_rule import InviteRule @@ -67,13 +65,13 @@ logger = logging.getLogger(__name__) # FIXME: Unused metric, remove if not needed. -push_rules_invalidation_counter = Counter( +push_rules_invalidation_counter = SynapseCounter( "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter", "", labelnames=[SERVER_NAME_LABEL], ) # FIXME: Unused metric, remove if not needed. -push_rules_state_size_counter = Counter( +push_rules_state_size_counter = SynapseCounter( "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter", "", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 8df106b859..bba47808b0 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -23,15 +23,13 @@ import urllib.parse from typing import TYPE_CHECKING, Optional, Union -from prometheus_client import Counter - from twisted.internet.error import AlreadyCalled, AlreadyCancelled from twisted.internet.interfaces import IDelayedCall from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.logging import opentracing -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.push import Pusher, PusherConfig, PusherConfigException from synapse.storage.databases.main.event_push_actions import HttpPushAction from synapse.types import JsonDict, JsonMapping @@ -43,25 +41,25 @@ logger = logging.getLogger(__name__) -http_push_processed_counter = Counter( +http_push_processed_counter = SynapseCounter( "synapse_http_httppusher_http_pushes_processed", "Number of push notifications successfully sent", labelnames=[SERVER_NAME_LABEL], ) -http_push_failed_counter = Counter( +http_push_failed_counter = SynapseCounter( "synapse_http_httppusher_http_pushes_failed", "Number of push notifications which failed", labelnames=[SERVER_NAME_LABEL], ) -http_badges_processed_counter = Counter( +http_badges_processed_counter = SynapseCounter( "synapse_http_httppusher_badge_updates_processed", "Number of badge updates successfully sent", labelnames=[SERVER_NAME_LABEL], ) -http_badges_failed_counter = Counter( +http_badges_failed_counter = SynapseCounter( "synapse_http_httppusher_badge_updates_failed", "Number of badge updates which failed", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 3dac61aed5..73ac13196a 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -26,13 +26,12 @@ import bleach import jinja2 from markupsafe import Markup -from prometheus_client import Counter from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes from synapse.api.errors import StoreError from synapse.config.emailconfig import EmailSubjectConfig from synapse.events import EventBase -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.push.presentable_names import ( calculate_room_name, descriptor_from_member_events, @@ -58,7 +57,7 @@ T = TypeVar("T") -emails_sent_counter = Counter( +emails_sent_counter = SynapseCounter( "synapse_emails_sent_total", "Emails sent by type", labelnames=["type", SERVER_NAME_LABEL], diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 6b70de976a..6350e4f5b1 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -22,10 +22,8 @@ import logging from typing import TYPE_CHECKING, Iterable, Optional -from prometheus_client import Gauge - from synapse.api.errors import Codes, SynapseError -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) @@ -46,7 +44,7 @@ logger = logging.getLogger(__name__) -synapse_pushers = Gauge( +synapse_pushers = SynapseGauge( "synapse_pushers", "Number of active synapse pushers", labelnames=["kind", "app_id", SERVER_NAME_LABEL], diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index d76b40cf39..1809605f06 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -25,8 +25,6 @@ from inspect import signature from typing import TYPE_CHECKING, Any, Awaitable, Callable, ClassVar -from prometheus_client import Counter, Gauge - from twisted.internet.error import ConnectError, DNSLookupError from twisted.web.server import Request @@ -38,7 +36,7 @@ from synapse.http.site import SynapseRequest from synapse.logging import opentracing from synapse.logging.opentracing import trace_with_opname -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge from synapse.types import JsonDict from synapse.util.caches.response_cache import ResponseCache from synapse.util.cancellation import is_function_cancellable @@ -49,13 +47,13 @@ logger = logging.getLogger(__name__) -_pending_outgoing_requests = Gauge( +_pending_outgoing_requests = SynapseGauge( "synapse_pending_outgoing_replication_requests", "Number of active outgoing replication requests, by replication method name", labelnames=["name", SERVER_NAME_LABEL], ) -_outgoing_request_counter = Counter( +_outgoing_request_counter = SynapseCounter( "synapse_outgoing_replication_requests", "Number of outgoing replication requests, by replication method name and result", labelnames=["name", "code", SERVER_NAME_LABEL], diff --git a/synapse/replication/tcp/external_cache.py b/synapse/replication/tcp/external_cache.py index bcdd55d2e6..36ace55c53 100644 --- a/synapse/replication/tcp/external_cache.py +++ b/synapse/replication/tcp/external_cache.py @@ -22,11 +22,9 @@ import logging from typing import TYPE_CHECKING, Any, Optional -from prometheus_client import Counter, Histogram - from synapse.logging import opentracing from synapse.logging.context import make_deferred_yieldable -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram from synapse.util.json import json_decoder, json_encoder if TYPE_CHECKING: @@ -34,19 +32,19 @@ from synapse.server import HomeServer -set_counter = Counter( +set_counter = SynapseCounter( "synapse_external_cache_set", "Number of times we set a cache", labelnames=["cache_name", SERVER_NAME_LABEL], ) -get_counter = Counter( +get_counter = SynapseCounter( "synapse_external_cache_get", "Number of times we get a cache", labelnames=["cache_name", "hit", SERVER_NAME_LABEL], ) -response_timer = Histogram( +response_timer = SynapseHistogram( "synapse_external_cache_response_time_seconds", "Time taken to get a response from Redis for a cache get/set request", labelnames=["method", SERVER_NAME_LABEL], diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index bd1ee5ff9d..f0c7ec7748 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -32,11 +32,9 @@ Union, ) -from prometheus_client import Counter - from twisted.internet.protocol import ReconnectingClientFactory -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter from synapse.replication.tcp.commands import ( ClearUserSyncsCommand, Command, @@ -79,25 +77,25 @@ # number of updates received for each RDATA stream -inbound_rdata_count = Counter( +inbound_rdata_count = SynapseCounter( "synapse_replication_tcp_protocol_inbound_rdata_count", "", labelnames=["stream_name", SERVER_NAME_LABEL], ) -user_sync_counter = Counter( +user_sync_counter = SynapseCounter( "synapse_replication_tcp_resource_user_sync", "", labelnames=[SERVER_NAME_LABEL] ) -federation_ack_counter = Counter( +federation_ack_counter = SynapseCounter( "synapse_replication_tcp_resource_federation_ack", "", labelnames=[SERVER_NAME_LABEL], ) # FIXME: Unused metric, remove if not needed. -remove_pusher_counter = Counter( +remove_pusher_counter = SynapseCounter( "synapse_replication_tcp_resource_remove_pusher", "", labelnames=[SERVER_NAME_LABEL] ) -user_ip_cache_counter = Counter( +user_ip_cache_counter = SynapseCounter( "synapse_replication_tcp_resource_user_ip_cache", "", labelnames=[SERVER_NAME_LABEL] ) diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 733643cb64..aabc0f0201 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -30,7 +30,6 @@ from inspect import isawaitable from typing import TYPE_CHECKING, Any, Collection, Optional -from prometheus_client import Counter from zope.interface import Interface, implementer from twisted.internet import task @@ -39,7 +38,7 @@ from twisted.python.failure import Failure from synapse.logging.context import PreserveLoggingContext -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter from synapse.metrics.background_process_metrics import ( BackgroundProcessLoggingContext, ) @@ -62,19 +61,19 @@ from synapse.server import HomeServer -connection_close_counter = Counter( +connection_close_counter = SynapseCounter( "synapse_replication_tcp_protocol_close_reason", "", labelnames=["reason_type", SERVER_NAME_LABEL], ) -tcp_inbound_commands_counter = Counter( +tcp_inbound_commands_counter = SynapseCounter( "synapse_replication_tcp_protocol_inbound_commands", "Number of commands received from replication, by command and name of process connected to", labelnames=["command", "name", SERVER_NAME_LABEL], ) -tcp_outbound_commands_counter = Counter( +tcp_outbound_commands_counter = SynapseCounter( "synapse_replication_tcp_protocol_outbound_commands", "Number of commands sent to replication, by command and name of process connected to", labelnames=["command", "name", SERVER_NAME_LABEL], diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 8df0a3853f..f1d0b17e0d 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -24,12 +24,10 @@ import random from typing import TYPE_CHECKING, Optional -from prometheus_client import Counter - from twisted.internet.interfaces import IAddress from twisted.internet.protocol import ServerFactory -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.replication.tcp.commands import PositionCommand from synapse.replication.tcp.protocol import ServerReplicationStreamProtocol from synapse.replication.tcp.streams import EventsStream @@ -39,7 +37,7 @@ if TYPE_CHECKING: from synapse.server import HomeServer -stream_updates_counter = Counter( +stream_updates_counter = SynapseCounter( "synapse_replication_tcp_resource_stream_updates", "", labelnames=["stream_name", SERVER_NAME_LABEL], diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 38e315d0e7..7f4b8aced0 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -28,8 +28,6 @@ from typing import TYPE_CHECKING, Awaitable, Optional from urllib import parse as urlparse -from prometheus_client.core import Histogram - from twisted.web.server import Request from synapse import event_auth @@ -65,7 +63,7 @@ from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram from synapse.rest.client._base import client_patterns from synapse.rest.client.transactions import HttpTransactionCache from synapse.state import CREATE_KEY, POWER_KEY @@ -113,7 +111,7 @@ def from_member_count(member_count: int) -> "_RoomSize": # greater than 10s. We use a separate dedicated histogram with its own buckets # so that we don't increase the cardinality of the general one because it's # multiplied across hundreds of servlets. -messsages_response_timer = Histogram( +messsages_response_timer = SynapseHistogram( "synapse_room_message_list_rest_servlet_response_time_seconds", "sec", # We have a label for room size so we can try to see a more realistic diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 991e1f847a..1e3c267183 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -33,7 +33,6 @@ import attr from immutabledict import immutabledict -from prometheus_client import Counter, Histogram from synapse.api.constants import EventTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions @@ -45,7 +44,7 @@ ) from synapse.logging.context import ContextResourceUsage from synapse.logging.opentracing import tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.event_federation import StateDifference @@ -67,7 +66,7 @@ metrics_logger = logging.getLogger("synapse.state.metrics") # Metrics for number of state groups involved in a resolution. -state_groups_histogram = Histogram( +state_groups_histogram = SynapseHistogram( "synapse_state_number_state_groups_in_resolution", "Number of state groups used when performing a state resolution", labelnames=[SERVER_NAME_LABEL], @@ -600,25 +599,25 @@ class _StateResMetrics: db_events: int = 0 -_biggest_room_by_cpu_counter = Counter( +_biggest_room_by_cpu_counter = SynapseCounter( "synapse_state_res_cpu_for_biggest_room_seconds", "CPU time spent performing state resolution for the single most expensive " "room for state resolution", labelnames=[SERVER_NAME_LABEL], ) -_biggest_room_by_db_counter = Counter( +_biggest_room_by_db_counter = SynapseCounter( "synapse_state_res_db_for_biggest_room_seconds", "Database time spent performing state resolution for the single most " "expensive room for state resolution", labelnames=[SERVER_NAME_LABEL], ) -_cpu_times = Histogram( +_cpu_times = SynapseHistogram( "synapse_state_res_cpu_for_all_rooms_seconds", "CPU time (utime+stime) spent computing a single state resolution", labelnames=[SERVER_NAME_LABEL], ) -_db_times = Histogram( +_db_times = SynapseHistogram( "synapse_state_res_db_for_all_rooms_seconds", "Database time spent computing a single state resolution", labelnames=[SERVER_NAME_LABEL], @@ -858,7 +857,7 @@ def _report_biggest( self, extract_key: Callable[[_StateResMetrics], Any], metric_name: str, - prometheus_counter_metric: Counter, + prometheus_counter_metric: SynapseCounter, ) -> None: """Report metrics on the biggest rooms for state res diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 0daf4830d9..f9d9934b56 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -40,7 +40,6 @@ ) import attr -from prometheus_client import Counter, Histogram from twisted.internet import defer @@ -56,7 +55,7 @@ start_active_span_follows_from, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState @@ -77,13 +76,13 @@ logger = logging.getLogger(__name__) # The number of times we are recalculating the current state -state_delta_counter = Counter( +state_delta_counter = SynapseCounter( "synapse_storage_events_state_delta", "", labelnames=[SERVER_NAME_LABEL] ) # The number of times we are recalculating state when there is only a # single forward extremity -state_delta_single_event_counter = Counter( +state_delta_single_event_counter = SynapseCounter( "synapse_storage_events_state_delta_single_event", "", labelnames=[SERVER_NAME_LABEL], @@ -92,12 +91,12 @@ # The number of times we are reculating state when we could have resonably # calculated the delta when we calculated the state for an event we were # persisting. -state_delta_reuse_delta_counter = Counter( +state_delta_reuse_delta_counter = SynapseCounter( "synapse_storage_events_state_delta_reuse_delta", "", labelnames=[SERVER_NAME_LABEL] ) # The number of forward extremities for each new event. -forward_extremities_counter = Histogram( +forward_extremities_counter = SynapseHistogram( "synapse_storage_events_forward_extremities_persisted", "Number of forward extremities for each new event", labelnames=[SERVER_NAME_LABEL], @@ -106,26 +105,26 @@ # The number of stale forward extremities for each new event. Stale extremities # are those that were in the previous set of extremities as well as the new. -stale_forward_extremities_counter = Histogram( +stale_forward_extremities_counter = SynapseHistogram( "synapse_storage_events_stale_forward_extremities_persisted", "Number of unchanged forward extremities for each new event", labelnames=[SERVER_NAME_LABEL], buckets=(0, 1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"), ) -state_resolutions_during_persistence = Counter( +state_resolutions_during_persistence = SynapseCounter( "synapse_storage_events_state_resolutions_during_persistence", "Number of times we had to do state res to calculate new current state", labelnames=[SERVER_NAME_LABEL], ) -potential_times_prune_extremities = Counter( +potential_times_prune_extremities = SynapseCounter( "synapse_storage_events_potential_times_prune_extremities", "Number of times we might be able to prune extremities", labelnames=[SERVER_NAME_LABEL], ) -times_pruned_extremities = Counter( +times_pruned_extremities = SynapseCounter( "synapse_storage_events_times_pruned_extremities", "Number of times we were actually be able to prune extremities", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/storage/database.py b/synapse/storage/database.py index b7f870bd26..a7f2eee04d 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -43,7 +43,6 @@ ) import attr -from prometheus_client import Counter, Histogram from typing_extensions import Concatenate, ParamSpec from twisted.enterprise import adbapi @@ -57,7 +56,12 @@ current_context, make_deferred_yieldable, ) -from synapse.metrics import SERVER_NAME_LABEL, register_threadpool +from synapse.metrics import ( + SERVER_NAME_LABEL, + SynapseCounter, + SynapseHistogram, + register_threadpool, +) from synapse.storage.background_updates import BackgroundUpdater from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.types import Connection, Cursor, SQLQueryParameters @@ -77,19 +81,19 @@ transaction_logger = logging.getLogger("synapse.storage.txn") perf_logger = logging.getLogger("synapse.storage.TIME") -sql_scheduling_timer = Histogram( +sql_scheduling_timer = SynapseHistogram( "synapse_storage_schedule_time", "sec", labelnames=[SERVER_NAME_LABEL] ) -sql_query_timer = Histogram( +sql_query_timer = SynapseHistogram( "synapse_storage_query_time", "sec", labelnames=["verb", SERVER_NAME_LABEL] ) -sql_txn_count = Counter( +sql_txn_count = SynapseCounter( "synapse_storage_transaction_time_count", "sec", labelnames=["desc", SERVER_NAME_LABEL], ) -sql_txn_duration = Counter( +sql_txn_duration = SynapseCounter( "synapse_storage_transaction_time_sum", "sec", labelnames=["desc", SERVER_NAME_LABEL], diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 0a8571f0c8..c4eadee5b9 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -33,14 +33,13 @@ ) import attr -from prometheus_client import Counter, Gauge from synapse.api.constants import MAX_DEPTH from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict from synapse.logging.opentracing import tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.background_updates import ForeignKeyConstraint @@ -63,19 +62,19 @@ if TYPE_CHECKING: from synapse.server import HomeServer -oldest_pdu_in_federation_staging = Gauge( +oldest_pdu_in_federation_staging = SynapseGauge( "synapse_federation_server_oldest_inbound_pdu_in_staging", "The age in seconds since we received the oldest pdu in the federation staging area", labelnames=[SERVER_NAME_LABEL], ) -number_pdus_in_federation_queue = Gauge( +number_pdus_in_federation_queue = SynapseGauge( "synapse_federation_server_number_inbound_pdu_in_staging", "The total number of events in the inbound federation staging", labelnames=[SERVER_NAME_LABEL], ) -pdus_pruned_from_federation_queue = Counter( +pdus_pruned_from_federation_queue = SynapseCounter( "synapse_federation_server_number_inbound_pdu_pruned", "The number of events in the inbound federation staging that have been " "pruned due to the queue getting too long", diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index da9ecfbdb9..0013e59f52 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -36,7 +36,6 @@ ) import attr -from prometheus_client import Counter import synapse.metrics from synapse.api.constants import ( @@ -56,7 +55,7 @@ from synapse.events.snapshot import EventPersistencePair from synapse.events.utils import parse_stripped_state_event from synapse.logging.opentracing import trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -91,10 +90,10 @@ logger = logging.getLogger(__name__) -persist_event_counter = Counter( +persist_event_counter = SynapseCounter( "synapse_storage_events_persisted_events", "", labelnames=[SERVER_NAME_LABEL] ) -event_counter = Counter( +event_counter = SynapseCounter( "synapse_storage_events_persisted_events_sep", "", labelnames=["type", "origin_type", "origin_entity", SERVER_NAME_LABEL], diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 005f75a2d8..da7d610e4a 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -37,7 +37,6 @@ ) import attr -from prometheus_client import Gauge from twisted.internet import defer @@ -64,7 +63,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) @@ -132,7 +131,7 @@ def __init__( EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events -event_fetch_ongoing_gauge = Gauge( +event_fetch_ongoing_gauge = SynapseGauge( "synapse_event_fetch_ongoing", "The number of event fetchers that are running", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/util/batching_queue.py b/synapse/util/batching_queue.py index 514abcbec1..7e96b35086 100644 --- a/synapse/util/batching_queue.py +++ b/synapse/util/batching_queue.py @@ -34,7 +34,7 @@ from twisted.internet import defer from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.util.clock import Clock if TYPE_CHECKING: @@ -46,19 +46,19 @@ V = TypeVar("V") R = TypeVar("R") -number_queued = Gauge( +number_queued = SynapseGauge( "synapse_util_batching_queue_number_queued", "The number of items waiting in the queue across all keys", labelnames=("name", SERVER_NAME_LABEL), ) -number_in_flight = Gauge( +number_in_flight = SynapseGauge( "synapse_util_batching_queue_number_pending", "The number of items across all keys either being processed or waiting in a queue", labelnames=("name", SERVER_NAME_LABEL), ) -number_of_keys = Gauge( +number_of_keys = SynapseGauge( "synapse_util_batching_queue_number_of_keys", "The number of distinct keys that have items queued", labelnames=("name", SERVER_NAME_LABEL), diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index 380f2a78ca..fdf0fc48d6 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -35,18 +35,16 @@ cast, ) -from prometheus_client import Gauge - from twisted.internet import defer from twisted.python.failure import Failure -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.lrucache import LruCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry from synapse.util.clock import Clock -cache_pending_metric = Gauge( +cache_pending_metric = SynapseGauge( "synapse_util_caches_cache_pending", "Number of lookups currently pending for this cache", labelnames=["name", SERVER_NAME_LABEL], diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 6d1adf1131..7163540ab3 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -31,7 +31,7 @@ TypeVar, ) -from prometheus_client import CollectorRegistry, Counter, Metric +from prometheus_client import CollectorRegistry, Metric from typing_extensions import Concatenate, ParamSpec from synapse.logging.context import ( @@ -39,42 +39,42 @@ LoggingContext, current_context, ) -from synapse.metrics import SERVER_NAME_LABEL, InFlightGauge +from synapse.metrics import SERVER_NAME_LABEL, InFlightGauge, SynapseCounter from synapse.util.clock import Clock logger = logging.getLogger(__name__) # Metrics to see the number of and how much time is spend in various blocks of code. # -block_counter = Counter( +block_counter = SynapseCounter( "synapse_util_metrics_block_count", documentation="The number of times this block has been called.", labelnames=["block_name", SERVER_NAME_LABEL], ) """The number of times this block has been called.""" -block_timer = Counter( +block_timer = SynapseCounter( "synapse_util_metrics_block_time_seconds", documentation="The cumulative time spent executing this block across all calls, in seconds.", labelnames=["block_name", SERVER_NAME_LABEL], ) """The cumulative time spent executing this block across all calls, in seconds.""" -block_ru_utime = Counter( +block_ru_utime = SynapseCounter( "synapse_util_metrics_block_ru_utime_seconds", documentation="Resource usage: user CPU time in seconds used in this block", labelnames=["block_name", SERVER_NAME_LABEL], ) """Resource usage: user CPU time in seconds used in this block""" -block_ru_stime = Counter( +block_ru_stime = SynapseCounter( "synapse_util_metrics_block_ru_stime_seconds", documentation="Resource usage: system CPU time in seconds used in this block", labelnames=["block_name", SERVER_NAME_LABEL], ) """Resource usage: system CPU time in seconds used in this block""" -block_db_txn_count = Counter( +block_db_txn_count = SynapseCounter( "synapse_util_metrics_block_db_txn_count", documentation="Number of database transactions completed in this block", labelnames=["block_name", SERVER_NAME_LABEL], @@ -82,7 +82,7 @@ """Number of database transactions completed in this block""" # seconds spent waiting for db txns, excluding scheduling time, in this block -block_db_txn_duration = Counter( +block_db_txn_duration = SynapseCounter( "synapse_util_metrics_block_db_txn_duration_seconds", documentation="Seconds spent waiting for database txns, excluding scheduling time, in this block", labelnames=["block_name", SERVER_NAME_LABEL], @@ -90,7 +90,7 @@ """Seconds spent waiting for database txns, excluding scheduling time, in this block""" # seconds spent waiting for a db connection, in this block -block_db_sched_duration = Counter( +block_db_sched_duration = SynapseCounter( "synapse_util_metrics_block_db_sched_duration_seconds", documentation="Seconds spent waiting for a db connection, in this block", labelnames=["block_name", SERVER_NAME_LABEL], diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 37d2e4505d..d5309f73f4 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -35,8 +35,6 @@ ) from weakref import WeakSet -from prometheus_client.core import Counter - from twisted.internet import defer from synapse.api.errors import LimitExceededError @@ -47,7 +45,12 @@ run_in_background, ) from synapse.logging.opentracing import start_active_span -from synapse.metrics import SERVER_NAME_LABEL, Histogram, LaterGauge +from synapse.metrics import ( + SERVER_NAME_LABEL, + LaterGauge, + SynapseCounter, + SynapseHistogram, +) from synapse.util.clock import Clock if typing.TYPE_CHECKING: @@ -57,17 +60,17 @@ # Track how much the ratelimiter is affecting requests -rate_limit_sleep_counter = Counter( +rate_limit_sleep_counter = SynapseCounter( "synapse_rate_limit_sleep", "Number of requests slept by the rate limiter", labelnames=["rate_limiter_name", SERVER_NAME_LABEL], ) -rate_limit_reject_counter = Counter( +rate_limit_reject_counter = SynapseCounter( "synapse_rate_limit_reject", "Number of requests rejected by the rate limiter", labelnames=["rate_limiter_name", SERVER_NAME_LABEL], ) -queue_wait_timer = Histogram( +queue_wait_timer = SynapseHistogram( "synapse_rate_limit_queue_wait_time_seconds", "Amount of time spent waiting for the rate limiter to let our request through.", labelnames=["rate_limiter_name", SERVER_NAME_LABEL], diff --git a/tests/util/test_batching_queue.py b/tests/util/test_batching_queue.py index 30b07dc6ad..8ce7acaba1 100644 --- a/tests/util/test_batching_queue.py +++ b/tests/util/test_batching_queue.py @@ -19,11 +19,11 @@ # # -from prometheus_client import Gauge from twisted.internet import defer from synapse.logging.context import make_deferred_yieldable +from synapse.metrics import SynapseGauge from synapse.util.batching_queue import ( BatchingQueue, number_in_flight, @@ -59,7 +59,7 @@ async def _process_queue(self, values: list[str]) -> str: self._pending_calls.append((values, d)) return await make_deferred_yieldable(d) - def _get_sample_with_name(self, metric: Gauge, name: str) -> float: + def _get_sample_with_name(self, metric: SynapseGauge, name: str) -> float: """For a prometheus metric get the value of the sample that has a matching "name" label. """